[med-svn] [gmap] 01/04: Imported Upstream version 2014-12-29
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Mon Jun 1 13:46:49 UTC 2015
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit 15c4dbd734f25d689228d0e75e34978c603b443e
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Mon Jun 1 15:12:58 2015 +0200
Imported Upstream version 2014-12-29
---
ChangeLog | 60 ++
VERSION | 2 +-
configure | 24 +-
src/chimera.c | 3 +-
src/iit-read.c | 64 +-
src/list.c | 16 +-
src/list.h | 3 +-
src/pair.c | 48 +-
src/pair.h | 4 +-
src/stage3hr.c | 2422 +++++++++++++++++++++++++++++++++++++++++--------------
src/substring.c | 105 ++-
src/substring.h | 17 +-
12 files changed, 2049 insertions(+), 719 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 822b46c..472d40a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,63 @@
+2015-05-01 twu
+
+ * VERSION: Updated version number
+
+ * chimera.c: Applying patch 162196 from trunk to add range of chimerapos to
+ XT field
+
+ * iit-read.c: Applying patch 164702 from trunk to check for possibility of a
+ zero-length array in IIT_get_highs_for_low and IIT_get_lows_for_high
+
+ * stage3hr.c: Applying patch 164702 from trunk to fix order of LtoH
+ substrings for deletions
+
+2015-03-25 twu
+
+ * VERSION: Updated version number
+
+ * VERSION, public-2014-12-17, src, stage3hr.c: Merged revisions 161720
+ through 161861 from trunk to fix --clip-overlap and --merge-overlap
+
+2015-03-23 twu
+
+ * src, stage3hr.c: Merged revision 161669 from trunk to do final
+ test_hardclip
+
+ * src, substring.c: Merged revision 161663 from trunk to fix alias_circular
+ and unalias_circular for genomicstart_adj
+
+2015-03-22 twu
+
+ * src, stage3hr.c: Merged revision 161659 from trunk to change endpoint test
+ in Stage3end_substring_low
+
+ * src, stage3hr.c, substring.c, substring.h: Merged revisions 161649 through
+ 161651 from trunk to add genomicstart_adj and genomicend_adj
+
+2015-03-21 twu
+
+ * src, stage3hr.c, substring.c, substring.h: Merged revisions 161638 and
+ 161639 to restore genomicstart for substring2 of insertions and deletions,
+ and to change Substring_convert_to_pairs instead
+
+ * public-2014-12-17: Updated version number
+
+ * pair.c, src: Merged revision 161633 from trunk to make
+ Pairarray_contains_p routine look for any gap or indel
+
+ * public-2014-12-17, src, stage3hr.c: Merged revisions 161606 and 161608
+ from trunk to adjust low_querypos and high_querypos independently
+
+ * public-2014-12-17, src, stage3hr.c: Merged revision 161603 from trunk to
+ make overlap clipping more even
+
+ * src, stage3hr.c: Merged revision 161600 from trunk to fix bug in defining
+ left2 for deletion
+
+ * VERSION, list.c, list.h, pair.c, pair.h, public-2014-12-17, src,
+ stage3hr.c, substring.c: Merged revisions 161197 through 161596 from trunk
+ to fix --clip-overlap and --merge-overlap
+
2015-03-18 twu
* public-2014-12-17, src, stage3hr.c: Merged revision 161197 from trunk to
diff --git a/VERSION b/VERSION
index 4568d14..ab18138 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2014-12-25
\ No newline at end of file
+2014-12-29
\ No newline at end of file
diff --git a/configure b/configure
index 1631267..7ba6b5b 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.63 for gmap 2014-12-25.
+# Generated by GNU Autoconf 2.63 for gmap 2014-12-29.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -745,8 +745,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2014-12-25'
-PACKAGE_STRING='gmap 2014-12-25'
+PACKAGE_VERSION='2014-12-29'
+PACKAGE_STRING='gmap 2014-12-29'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
ac_unique_file="src/gmap.c"
@@ -1512,7 +1512,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2014-12-25 to adapt to many kinds of systems.
+\`configure' configures gmap 2014-12-29 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1583,7 +1583,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2014-12-25:";;
+ short | recursive ) echo "Configuration of gmap 2014-12-29:";;
esac
cat <<\_ACEOF
@@ -1718,7 +1718,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2014-12-25
+gmap configure 2014-12-29
generated by GNU Autoconf 2.63
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1732,7 +1732,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2014-12-25, which was
+It was created by gmap $as_me 2014-12-29, which was
generated by GNU Autoconf 2.63. Invocation command line was
$ $0 $@
@@ -2102,8 +2102,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:$LINENO: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:$LINENO: result: 2014-12-25" >&5
-$as_echo "2014-12-25" >&6; }
+{ $as_echo "$as_me:$LINENO: result: 2014-12-29" >&5
+$as_echo "2014-12-29" >&6; }
### Read defaults
@@ -4162,7 +4162,7 @@ fi
# Define the identity of the package.
PACKAGE=gmap
- VERSION=2014-12-25
+ VERSION=2014-12-29
cat >>confdefs.h <<_ACEOF
@@ -26480,7 +26480,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2014-12-25, which was
+This file was extended by gmap $as_me 2014-12-29, which was
generated by GNU Autoconf 2.63. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -26543,7 +26543,7 @@ Report bugs to <bug-autoconf at gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_version="\\
-gmap config.status 2014-12-25
+gmap config.status 2014-12-29
configured by $0, generated by GNU Autoconf 2.63,
with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/src/chimera.c b/src/chimera.c
index 8bea549..95f7901 100644
--- a/src/chimera.c
+++ b/src/chimera.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: chimera.c 158535 2015-02-12 21:33:37Z twu $";
+static char rcsid[] = "$Id: chimera.c 164705 2015-05-01 20:26:27Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -143,6 +143,7 @@ Chimera_print_sam_tag (FILE *fp, T this, Univ_IIT_T chromosome_iit) {
fprintf(fp,",%c%s@%u..%c%s@%u",
donor_strand,donor_chr,Stage3_chrend(this->from),
acceptor_strand,acceptor_chr,Stage3_chrstart(this->to));
+ fprintf(fp,",%d..%d",this->chimerapos+1,this->equivpos+1);
if (alloc2p == true) {
FREE(acceptor_chr);
}
diff --git a/src/iit-read.c b/src/iit-read.c
index a95e60f..3fdebda 100644
--- a/src/iit-read.c
+++ b/src/iit-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit-read.c 153955 2014-11-24 17:54:45Z twu $";
+static char rcsid[] = "$Id: iit-read.c 164704 2015-05-01 20:24:48Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -3721,21 +3721,28 @@ IIT_get_highs_for_low (int *nuniq, T this, int divno, Chrpos_T x) {
}
}
- /* Eliminate duplicates */
- qsort(coords,ncoords,sizeof(Chrpos_T),uint_compare_ascending);
+ if (ncoords == 0) {
+ *nuniq = 0;
+ FREE(coords);
+ return (Chrpos_T *) NULL;
- uniq = (Chrpos_T *) CALLOC(ncoords,sizeof(Chrpos_T));
- *nuniq = 0;
- prev = 0;
- for (i = 0; i < ncoords; i++) {
- if (coords[i] != prev) {
- uniq[(*nuniq)++] = coords[i];
- prev = coords[i];
+ } else {
+ /* Eliminate duplicates */
+ qsort(coords,ncoords,sizeof(Chrpos_T),uint_compare_ascending);
+
+ uniq = (Chrpos_T *) CALLOC(ncoords,sizeof(Chrpos_T));
+ *nuniq = 0;
+ prev = 0;
+ for (i = 0; i < ncoords; i++) {
+ if (coords[i] != prev) {
+ uniq[(*nuniq)++] = coords[i];
+ prev = coords[i];
+ }
}
+
+ FREE(coords);
+ return uniq;
}
-
- FREE(coords);
- return uniq;
}
}
@@ -3782,21 +3789,28 @@ IIT_get_lows_for_high (int *nuniq, T this, int divno, Chrpos_T x) {
}
}
- /* Eliminate duplicates */
- qsort(coords,ncoords,sizeof(Chrpos_T),uint_compare_descending);
+ if (ncoords == 0) {
+ *nuniq = 0;
+ FREE(coords);
+ return (Chrpos_T *) NULL;
- uniq = (Chrpos_T *) CALLOC(ncoords,sizeof(Chrpos_T));
- *nuniq = 0;
- prev = 0;
- for (i = 0; i < ncoords; i++) {
- if (coords[i] != prev) {
- uniq[(*nuniq)++] = coords[i];
- prev = coords[i];
+ } else {
+ /* Eliminate duplicates */
+ qsort(coords,ncoords,sizeof(Chrpos_T),uint_compare_descending);
+
+ uniq = (Chrpos_T *) CALLOC(ncoords,sizeof(Chrpos_T));
+ *nuniq = 0;
+ prev = 0;
+ for (i = 0; i < ncoords; i++) {
+ if (coords[i] != prev) {
+ uniq[(*nuniq)++] = coords[i];
+ prev = coords[i];
+ }
}
+
+ FREE(coords);
+ return uniq;
}
-
- FREE(coords);
- return uniq;
}
}
diff --git a/src/list.c b/src/list.c
index c16bef8..81739fa 100644
--- a/src/list.c
+++ b/src/list.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: list.c 148359 2014-09-19 22:09:34Z twu $";
+static char rcsid[] = "$Id: list.c 161598 2015-03-21 02:37:54Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -53,6 +53,20 @@ List_pop (T list, void **x) {
}
}
+T
+List_pop_out (T list, void **x) {
+ T head;
+
+ if (list) {
+ head = list->rest;
+ *x = list->first;
+ FREE_OUT(list);
+ return head;
+ } else {
+ return list;
+ }
+}
+
void *
List_head (T list) {
return list->first;
diff --git a/src/list.h b/src/list.h
index 2cad179..c8b581e 100644
--- a/src/list.h
+++ b/src/list.h
@@ -1,4 +1,4 @@
-/* $Id: list.h 148359 2014-09-19 22:09:34Z twu $ */
+/* $Id: list.h 161598 2015-03-21 02:37:54Z twu $ */
#ifndef LIST_INCLUDED
#define LIST_INCLUDED
@@ -9,6 +9,7 @@ extern T List_push (T list, void *x);
extern T List_push_keep (T list, void *x);
extern T List_push_out (T list, void *x);
extern T List_pop (T list, void **x);
+extern T List_pop_out (T list, void **x);
extern void *List_head (T list);
extern T List_next (T list);
extern void List_head_set (T list, void *x);
diff --git a/src/pair.c b/src/pair.c
index 8120dc3..b20ead6 100644
--- a/src/pair.c
+++ b/src/pair.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 158535 2015-02-12 21:33:37Z twu $";
+static char rcsid[] = "$Id: pair.c 161635 2015-03-21 20:17:29Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -8491,29 +8491,63 @@ Pair_binary_search_descending (int *querypos, int lowi, int highi, struct T *pai
#ifndef PMAP
+/* Assumes querypos is in ascending order. Need to look for worst
+ case, so go to querypos, and then check all pairs for that
+ querypos. This also guarantees that the querypos value is unique,
+ since a second value must be due to an indel. */
bool
Pairarray_contains_p (struct T *pairarray, int npairs, int querypos) {
int i;
+
+ i = 0;
+ while (i < npairs && pairarray[i].querypos < querypos) {
+ i++;
+ }
+
+ if (i >= npairs || pairarray[i].querypos > querypos) {
+ return false;
+ } else {
+ while (i < npairs && pairarray[i].querypos == querypos) {
+ if (pairarray[i].gapp == true) {
+ return false;
+ } else if (pairarray[i].cdna == ' ') {
+ return false;
+ } else if (pairarray[i].genome == ' ') {
+ return false;
+ } else {
+ /* Withhold judgement */
+ i++;
+ }
+ }
+
+ return true;
+ }
+}
+
+
+Chrpos_T
+Pairarray_lookup (struct T *pairarray, int npairs, int querypos) {
+ int i;
T pair;
for (i = 0; i < npairs; i++) {
pair = &(pairarray[i]);
if (pair->querypos > querypos) {
- return false;
+ /* continue */
} else if (pair->querypos < querypos) {
/* continue */
} else if (pair->gapp == true) {
- return false;
+ /* continue */
} else if (pair->cdna == ' ') {
- return false;
+ /* continue */
} else if (pair->genome == ' ') {
- return false;
+ /* continue */
} else {
- return true;
+ return pair->genomepos;
}
}
- return false;
+ return 0;
}
diff --git a/src/pair.h b/src/pair.h
index be36a93..5ed76d9 100644
--- a/src/pair.h
+++ b/src/pair.h
@@ -1,4 +1,4 @@
-/* $Id: pair.h 158535 2015-02-12 21:33:37Z twu $ */
+/* $Id: pair.h 161598 2015-03-21 02:37:54Z twu $ */
#ifndef PAIR_INCLUDED
#define PAIR_INCLUDED
@@ -319,6 +319,8 @@ Pair_binary_search_descending (int *querypos, int lowi, int highi, struct T *pai
extern bool
Pairarray_contains_p (struct T *pairarray, int npairs, int querypos);
extern Chrpos_T
+Pairarray_lookup (struct T *pairarray, int npairs, int querypos);
+extern Chrpos_T
Pair_genomicpos_low (int hardclip_low, int hardclip_high, struct T *pairarray, int npairs, int querylength,
bool watsonp, bool hide_soft_clips_p);
#endif
diff --git a/src/stage3hr.c b/src/stage3hr.c
index 8267333..0ea9073 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 161199 2015-03-18 18:26:53Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 164703 2015-05-01 20:24:10Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1038,7 +1038,7 @@ Stage3end_substring_low (T this, int trim_low) {
this->querylength_adj - trim_low,this->querylength_adj,trim_low);
}
#endif
- while (p != NULL && Substring_querystart((Substring_T) List_head(p)) >= this->querylength_adj - trim_low) {
+ while (p != NULL && Substring_querystart((Substring_T) List_head(p)) > this->querylength_adj - trim_low) {
p = List_next(p);
#ifdef DEBUG13
if (p != NULL) {
@@ -1816,36 +1816,53 @@ Stage3pair_filter_nonconcordant (List_T hitpairs) {
static Univcoord_T
gmap5_substring3_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3, Substring_T substring) {
Univcoord_T chroffset;
+ Chrpos_T start, end;
int i;
chroffset = hit3->chroffset;
if (hit5->plusp == true) {
- debug13(printf("plus goal: %u up to %u\n",Substring_alignstart_trim(substring) - chroffset,Substring_alignend_trim(substring) - chroffset));
+ start = Substring_alignstart_trim(substring) - chroffset;
+ end = Substring_alignend_trim(substring) - 1U - chroffset; /* inclusive */
+ debug13(printf("plus goal: %u up to %u\n",start,end));
i = 0;
while (i < hit5->npairs) {
debug13(printf(" pair %d: genomepos %u\n",i,hit5->pairarray[i].genomepos));
- if (hit5->pairarray[i].genomepos < Substring_alignstart_trim(substring) - chroffset) {
+ if (hit5->pairarray[i].gapp == true) {
+ /* Skip intron */
i++;
- } else if (hit5->pairarray[i].genomepos > Substring_alignend_trim(substring) - chroffset) {
+ } else if (hit5->pairarray[i].cdna == ' ' || hit5->pairarray[i].genome == ' ') {
+ /* Skip indel */
+ i++;
+ } else if (hit5->pairarray[i].genomepos < start) {
+ i++;
+ } else if (hit5->pairarray[i].genomepos > end) {
i++;
} else {
- debug13(printf("Returning common point at %llu\n",(unsigned long long) hit5->pairarray[i].genomepos + chroffset));
+ debug13(printf("Returning common point at %llu\n",(unsigned long long) hit5->pairarray[i].genomepos));
return hit5->pairarray[i].genomepos + chroffset;
}
}
return 0;
} else {
- debug13(printf("minus goal: %u down to %u\n",Substring_alignend_trim(substring) - chroffset,Substring_alignstart_trim(substring) - chroffset));
+ start = Substring_alignstart_trim(substring) - 1U - chroffset; /* inclusive */
+ end = Substring_alignend_trim(substring) - chroffset;
+ debug13(printf("minus goal: %u up to %u\n",end,start));
i = hit5->npairs - 1;
while (i >= 0) {
debug13(printf(" pair %d: genomepos %u\n",i,hit5->pairarray[i].genomepos));
- if (hit5->pairarray[i].genomepos > Substring_alignstart_trim(substring) - chroffset) {
+ if (hit5->pairarray[i].gapp == true) {
+ /* Skip intron */
+ i--;
+ } else if (hit5->pairarray[i].cdna == ' ' || hit5->pairarray[i].genome == ' ') {
+ /* Skip indel */
i--;
- } else if (hit5->pairarray[i].genomepos < Substring_alignend_trim(substring) - chroffset) {
+ } else if (hit5->pairarray[i].genomepos > start) {
+ i--;
+ } else if (hit5->pairarray[i].genomepos < end) {
i--;
} else {
- debug13(printf("Returning common point at %llu\n",(unsigned long long) hit5->pairarray[i].genomepos + chroffset));
+ debug13(printf("Returning common point at %llu\n",(unsigned long long) hit5->pairarray[i].genomepos));
return hit5->pairarray[i].genomepos + chroffset;
}
}
@@ -1856,36 +1873,54 @@ gmap5_substring3_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3, Substrin
static Univcoord_T
substring5_gmap3_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3, Substring_T substring) {
Univcoord_T chroffset;
+ Chrpos_T start, end;
int j;
chroffset = hit5->chroffset;
if (hit5->plusp == true) {
- debug13(printf("plus goal: %u up to %u\n",Substring_alignstart_trim(substring) - chroffset,Substring_alignend_trim(substring) - chroffset));
+ start = Substring_alignstart_trim(substring) - chroffset;
+ end = Substring_alignend_trim(substring) - 1U - chroffset; /* inclusive */
+ debug13(printf("plus goal: %u up to %u\n",start,end));
+
j = 0;
while (j < hit3->npairs) {
debug13(printf(" pair %d: genomepos %u\n",j,hit3->pairarray[j].genomepos));
- if (hit3->pairarray[j].genomepos < Substring_alignstart_trim(substring) - chroffset) {
+ if (hit3->pairarray[j].gapp == true) {
+ /* Skip intron */
j++;
- } else if (hit3->pairarray[j].genomepos > Substring_alignend_trim(substring) - chroffset) {
+ } else if (hit3->pairarray[j].cdna == ' ' || hit3->pairarray[j].genome == ' ') {
+ /* Skip indel */
+ j++;
+ } else if (hit3->pairarray[j].genomepos < start) {
+ j++;
+ } else if (hit3->pairarray[j].genomepos > end) {
j++;
} else {
- debug13(printf("Returning common point at %llu\n",(unsigned long long) hit3->pairarray[j].genomepos + chroffset));
+ debug13(printf("Returning common point at %llu\n",(unsigned long long) hit3->pairarray[j].genomepos));
return hit3->pairarray[j].genomepos + chroffset;
}
}
return 0;
} else {
- debug13(printf("minus goal: %u down to %u\n",Substring_alignstart_trim(substring) - chroffset,Substring_alignend_trim(substring) - chroffset));
+ start = Substring_alignstart_trim(substring) - 1U - chroffset; /* inclusive */
+ end = Substring_alignend_trim(substring) - chroffset;
+ debug13(printf("minus goal: %u up to %u\n",end,start));
j = hit3->npairs - 1;
while (j >= 0) {
debug13(printf(" pair %d: genomepos %u\n",j,hit3->pairarray[j].genomepos));
- if (hit3->pairarray[j].genomepos > Substring_alignstart_trim(substring) - chroffset) {
+ if (hit3->pairarray[j].gapp == true) {
+ /* Skip intron */
+ j--;
+ } else if (hit3->pairarray[j].cdna == ' ' || hit3->pairarray[j].genome == ' ') {
+ /* Skip indel */
j--;
- } else if (hit3->pairarray[j].genomepos < Substring_alignend_trim(substring) - chroffset) {
+ } else if (hit3->pairarray[j].genomepos > start) {
+ j--;
+ } else if (hit3->pairarray[j].genomepos < end) {
j--;
} else {
- debug13(printf("Returning common point at %llu\n",(unsigned long long) hit3->pairarray[j].genomepos + chroffset));
+ debug13(printf("Returning common point at %llu\n",(unsigned long long) hit3->pairarray[j].genomepos));
return hit3->pairarray[j].genomepos + chroffset;
}
}
@@ -1894,7 +1929,8 @@ substring5_gmap3_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3, Substrin
}
-static void
+/* Returns true if ilengths are valid */
+static bool
find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T common_genomicpos, Univcoord_T chroffset) {
int i;
@@ -1907,7 +1943,7 @@ find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T
i++;
}
if (i >= hit->npairs) {
- abort();
+ return false;
} else if (hit->plusp == true) {
*ilength_low = hit->pairarray[i].querypos - hit->pairarray[0].querypos + 1;
*ilength_high = hit->pairarray[hit->npairs - 1].querypos - hit->pairarray[i].querypos + 1;
@@ -1920,16 +1956,20 @@ find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T
} else if (hit->plusp == true) {
debug13(printf("plus. Checking common genomicpos %llu against substring0 %p, substring1 %p, substring2 %p\n",
common_genomicpos,hit->substring0,hit->substring1,hit->substring2));
- /* Add + 1 when subtracting alignstart, but not when starting from alignend */
+ /* Plus: Subtract 1 from alignend */
if (Substring_overlap_point_trimmed_p(hit->substring0,common_genomicpos)) {
- debug13(printf("substring0: %u..%u\n",Substring_alignstart_trim(hit->substring0),Substring_alignend_trim(hit->substring0)));
+ debug13(printf("substring0: %u..%u\n",
+ Substring_alignstart_trim(hit->substring0) - hit->chroffset,
+ Substring_alignend_trim(hit->substring0) - 1U - hit->chroffset));
*ilength_low = (common_genomicpos - Substring_alignstart_trim(hit->substring0) + 1);
*ilength_high = ((Substring_alignend_trim(hit->substring0) - 1) - common_genomicpos + 1)
+ Substring_genomic_alignment_length(hit->substring1)
+ Substring_genomic_alignment_length(hit->substring2);
} else if (Substring_overlap_point_trimmed_p(hit->substring1,common_genomicpos)) {
- debug13(printf("substring1: %u..%u\n",Substring_alignstart_trim(hit->substring1),Substring_alignend_trim(hit->substring1)));
+ debug13(printf("substring1: %u..%u\n",
+ Substring_alignstart_trim(hit->substring1) - hit->chroffset,
+ Substring_alignend_trim(hit->substring1) - 1U - hit->chroffset));
*ilength_low = Substring_genomic_alignment_length(hit->substring0) +
(common_genomicpos - Substring_alignstart_trim(hit->substring1) + 1);
*ilength_high = ((Substring_alignend_trim(hit->substring1) - 1) - common_genomicpos + 1)
@@ -1939,7 +1979,9 @@ find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T
}
} else if (Substring_overlap_point_trimmed_p(hit->substring2,common_genomicpos)) {
- debug13(printf("substring2: %u..%u\n",Substring_alignstart_trim(hit->substring2),Substring_alignend_trim(hit->substring2)));
+ debug13(printf("substring2: %u..%u\n",
+ Substring_alignstart_trim(hit->substring2) - hit->chroffset,
+ Substring_alignend_trim(hit->substring2) - 1U - hit->chroffset));
*ilength_low = Substring_genomic_alignment_length(hit->substring0) +
Substring_genomic_alignment_length(hit->substring1) +
(common_genomicpos - Substring_alignstart_trim(hit->substring2) + 1);
@@ -1949,34 +1991,41 @@ find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T
}
} else {
- abort();
+ return false;
}
debug13(printf("Plus: Have ilength_low %d and ilength_high %d\n",*ilength_low,*ilength_high));
} else {
debug13(printf("minus. Checking common genomicpos %llu against substring0 %p, substring1 %p, substring2 %p\n",
common_genomicpos,hit->substring0,hit->substring1,hit->substring2));
+ /* Minus: Subtract 1 from alignstart */
if (Substring_overlap_point_trimmed_p(hit->substring0,common_genomicpos)) {
- debug13(printf("substring0: %u..%u\n",Substring_alignstart_trim(hit->substring0),Substring_alignend_trim(hit->substring0)));
+ debug13(printf("substring0: %u..%u\n",
+ Substring_alignstart_trim(hit->substring0) - 1U - hit->chroffset,
+ Substring_alignend_trim(hit->substring0) - hit->chroffset));
*ilength_low = Substring_genomic_alignment_length(hit->substring2) +
Substring_genomic_alignment_length(hit->substring1) +
- (common_genomicpos - (Substring_alignend_trim(hit->substring0) + 1) + 1);
- *ilength_high = (Substring_alignstart_trim(hit->substring0) - common_genomicpos + 1);
+ (common_genomicpos - (Substring_alignend_trim(hit->substring0) /*+ 1*/) + 1);
+ *ilength_high = ((Substring_alignstart_trim(hit->substring0) - 1) - common_genomicpos + 1);
} else if (Substring_overlap_point_trimmed_p(hit->substring1,common_genomicpos)) {
- debug13(printf("substring1: %u..%u\n",Substring_alignstart_trim(hit->substring1),Substring_alignend_trim(hit->substring1)));
+ debug13(printf("substring1: %u..%u\n",
+ Substring_alignstart_trim(hit->substring1) - 1U - hit->chroffset,
+ Substring_alignend_trim(hit->substring1) - hit->chroffset));
*ilength_low = Substring_genomic_alignment_length(hit->substring2) +
- (common_genomicpos - (Substring_alignend_trim(hit->substring1) + 1) + 1);
- *ilength_high = (Substring_alignstart_trim(hit->substring1) - common_genomicpos + 1)
+ (common_genomicpos - (Substring_alignend_trim(hit->substring1) /*+ 1*/) + 1);
+ *ilength_high = ((Substring_alignstart_trim(hit->substring1) - 1) - common_genomicpos + 1)
+ Substring_genomic_alignment_length(hit->substring0);
if (hit->hittype == INSERTION) {
*ilength_low += hit->nindels;
}
} else if (Substring_overlap_point_trimmed_p(hit->substring2,common_genomicpos)) {
- debug13(printf("substring2: %u..%u\n",Substring_alignstart_trim(hit->substring2),Substring_alignend_trim(hit->substring2)));
- *ilength_low = (common_genomicpos - (Substring_alignend_trim(hit->substring2) + 1) + 1);
- *ilength_high = (Substring_alignstart_trim(hit->substring2) - common_genomicpos + 1)
+ debug13(printf("substring2: %u..%u\n",
+ Substring_alignstart_trim(hit->substring2) - 1U - hit->chroffset,
+ Substring_alignend_trim(hit->substring2) - hit->chroffset));
+ *ilength_low = (common_genomicpos - (Substring_alignend_trim(hit->substring2) /*+ 1*/) + 1);
+ *ilength_high = ((Substring_alignstart_trim(hit->substring2) - 1) - common_genomicpos + 1)
+ Substring_genomic_alignment_length(hit->substring1)
+ Substring_genomic_alignment_length(hit->substring0);
if (hit->hittype == INSERTION) {
@@ -1984,12 +2033,12 @@ find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T
}
} else {
- abort();
+ return false;
}
debug13(printf("Minus: Have ilength_low %d and ilength_high %d\n",*ilength_low,*ilength_high));
}
- return;
+ return true;
}
@@ -2006,7 +2055,19 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
if (hit5->plusp == true) {
i = j = 0;
while (i < hit5->npairs && j < hit3->npairs) {
- if (hit5->pairarray[i].genomepos < hit3->pairarray[j].genomepos) {
+ if (hit5->pairarray[i].gapp == true) {
+ /* Skip intron */
+ i++;
+ } else if (hit5->pairarray[i].cdna == ' ' || hit5->pairarray[i].genome == ' ') {
+ /* Skip indel */
+ i++;
+ } else if (hit3->pairarray[j].gapp == true) {
+ /* Skip intron */
+ j++;
+ } else if (hit3->pairarray[j].cdna == ' ' || hit3->pairarray[j].genome == ' ') {
+ /* Skip indel */
+ j++;
+ } else if (hit5->pairarray[i].genomepos < hit3->pairarray[j].genomepos) {
i++;
} else if (hit5->pairarray[i].genomepos > hit3->pairarray[j].genomepos) {
j++;
@@ -2022,7 +2083,19 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else {
i = j = 0;
while (i < hit5->npairs && j < hit3->npairs) {
- if (hit5->pairarray[i].genomepos > hit3->pairarray[j].genomepos) {
+ if (hit5->pairarray[i].gapp == true) {
+ /* Skip intron */
+ i++;
+ } else if (hit5->pairarray[i].cdna == ' ' || hit5->pairarray[i].genome == ' ') {
+ /* Skip indel */
+ i++;
+ } else if (hit3->pairarray[j].gapp == true) {
+ /* Skip intron */
+ j++;
+ } else if (hit3->pairarray[j].cdna == ' ' || hit3->pairarray[j].genome == ' ') {
+ /* Skip indel */
+ j++;
+ } else if (hit5->pairarray[i].genomepos > hit3->pairarray[j].genomepos) {
i++;
} else if (hit5->pairarray[i].genomepos < hit3->pairarray[j].genomepos) {
j++;
@@ -2065,9 +2138,11 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
debug13(printf("Computing overlap using substrings plus/plus\n"));
start5 = hit5->genomicstart + hit5->trim_left + hit5->start_amb_length;
- end5 = hit5->genomicend - hit5->trim_right - hit5->end_amb_length;
+ end5 = (hit5->genomicend - 1) - hit5->trim_right - hit5->end_amb_length;
start3 = hit3->genomicstart + hit3->trim_left + hit3->start_amb_length;
- end3 = hit3->genomicend - hit3->trim_right - hit3->end_amb_length;
+ end3 = (hit3->genomicend - 1) - hit3->trim_right - hit3->end_amb_length;
+ debug13(printf("hit5 endpoints are %u..%u. hit3 endpoints are %u..%u\n",
+ start5-hit5->chroffset,end5-hit5->chroffset,start3-hit3->chroffset,end3-hit3->chroffset));
if (end3 < start5) {
/* Case 1 */
@@ -2078,7 +2153,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else if (start3 < start5) {
if (end3 < end5) {
/* Case 2: Tails overlap. Go from start5 to end3 */
- debug13(printf("plus case 2a: start5 %u\n",start5 - hit5->chroffset));
+ debug13(printf("plus/plus case 2a: start5 %u\n",start5 - hit5->chroffset));
if (Substring_overlap_point_trimmed_p(hit3->substring0,start5)) {
return start5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,start5)) {
@@ -2088,7 +2163,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
}
/* Case 2: Tails overlap. Go from start5 to end3 */
- debug13(printf("plus case 2b: end3 %u\n",end3 - hit3->chroffset));
+ debug13(printf("plus/plus case 2b: end3 %u\n",end3 - hit3->chroffset));
if (Substring_overlap_point_trimmed_p(hit5->substring2,end3)) {
return end3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,end3)) {
@@ -2100,7 +2175,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else {
/* Case 3: hit3 subsumes hit5 */
- debug13(printf("plus case 3\n"));
+ debug13(printf("plus/plus case 3\n"));
if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
@@ -2114,7 +2189,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else {
if (end3 < end5) {
/* Case 4: hit5 subsumes hit3 */
- debug13(printf("plus case 4\n"));
+ debug13(printf("plus/plus case 4\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
@@ -2126,7 +2201,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else {
/* Case 5: Based on hit3_trimmed_length */
- debug13(printf("plus case 5a\n"));
+ debug13(printf("plus/plus case 5a\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
@@ -2136,7 +2211,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
}
/* Case 5: Based on hit5_trimmed_length */
- debug13(printf("plus case 5b\n"));
+ debug13(printf("plus/plus case 5b\n"));
if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
@@ -2191,6 +2266,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else if (hit5->plusp == true && hit3->plusp == false) {
/* plus/minus */
debug13(printf("Computing overlap using substrings plus/minus\n"));
+ return 0;
start5 = hit5->genomicstart + hit5->trim_left + hit5->start_amb_length;
end5 = hit5->genomicend - hit5->trim_right - hit5->end_amb_length;
@@ -2319,6 +2395,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else if (hit5->plusp == false && hit3->plusp == true) {
/* minus/plus */
debug13(printf("Computing overlap using substrings minus/plus\n"));
+ return 0;
start5 = hit5->genomicstart - hit5->trim_left - hit5->start_amb_length;
end5 = hit5->genomicend + hit5->trim_right + hit5->end_amb_length;
@@ -2448,10 +2525,12 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
/* minus/minus */
debug13(printf("Computing overlap using substrings minus/minus\n"));
- start5 = hit5->genomicstart - hit5->trim_left - hit5->start_amb_length;
+ start5 = (hit5->genomicstart - 1) - hit5->trim_left - hit5->start_amb_length;
end5 = hit5->genomicend + hit5->trim_right + hit5->end_amb_length;
- start3 = hit3->genomicstart - hit3->trim_left - hit3->start_amb_length;
+ start3 = (hit3->genomicstart - 1) - hit3->trim_left - hit3->start_amb_length;
end3 = hit3->genomicend + hit3->trim_right + hit3->end_amb_length;
+ debug13(printf("hit5 endpoints are %u..%u. hit3 endpoints are %u..%u\n",
+ start5-hit5->chroffset,end5-hit5->chroffset,start3-hit3->chroffset,end3-hit3->chroffset));
if (end3 > start5) {
/* Case 1 */
@@ -2582,458 +2661,1175 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
}
-/* Replaces adjust_hardclips in samprint.c. */
-static void
-adjust_hardclips (int *hardclip_low, Stage3end_T hit_low, int low_querylength,
- int *hardclip_high, Stage3end_T hit_high, int high_querylength) {
- int orig_hardclip_low, orig_hardclip_high;
+static bool
+test_hardclips (Univcoord_T *common_genomicpos, int hardclip_low, Stage3end_T hit_low, int low_querylength,
+ int hardclip_high, Stage3end_T hit_high, int high_querylength, Univcoord_T chroffset) {
Substring_T low_substring, high_substring;
struct Pair_T *low_pairarray, *high_pairarray;
- int low_querystart, low_queryend, low_npairs, high_npairs;
+ int low_npairs, high_npairs;
int low_querypos, high_querypos;
bool plusp;
- debug13(printf("Entering adjust_hardclips with hardclip_low %d, hardclip_high %d\n",
- *hardclip_low,*hardclip_high));
- orig_hardclip_low = *hardclip_low;
- orig_hardclip_high = *hardclip_high;
+ debug13(printf("Entering test_hardclips with hardclip_low %d, hardclip_high %d\n",
+ hardclip_low,hardclip_high));
plusp = Stage3end_plusp(hit_low);
if (Stage3end_hittype(hit_low) == GMAP && Stage3end_hittype(hit_high) == GMAP) {
- debug13(printf("Dual GMAP\n"));
low_pairarray = Stage3end_pairarray(hit_low);
low_npairs = Stage3end_npairs(hit_low);
high_pairarray = Stage3end_pairarray(hit_high);
high_npairs = Stage3end_npairs(hit_high);
- if (plusp == true) {
- low_querypos = *hardclip_low;
- high_querypos = high_querylength - 1 - (*hardclip_high);
-
- while (low_querypos < low_querylength && high_querypos < high_querylength &&
- (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
- Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos++;
- high_querypos++;
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- while (low_querypos < low_querylength && high_querypos < high_querylength &&
- (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
- Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false)) {
- /* Shift clip querypos upward (rightward on chromosome) */
- debug13(printf("Low GMAP does not contain %d or high GMAP does not contain lower querypos %d => ",
- low_querypos-1,high_querypos-1));
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos++;
- high_querypos++;
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- while (low_querypos >= 0 && high_querypos >= 0 &&
- (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false ||
- Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false)) {
- /* Shift clip querypos downward (leftward on chromosome) */
- debug13(printf("Low GMAP does not contain %d or high GMAP does not contain higher querypos %d => ",
- low_querypos+1,high_querypos+1));
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos--;
- high_querypos--;
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- if (low_querypos < 0 || low_querypos >= low_querylength ||
- high_querypos < 0 || high_querypos >= high_querylength) {
- *hardclip_low = orig_hardclip_low;
- *hardclip_high = orig_hardclip_high;
- }
-
- } else {
- low_querypos = low_querylength - 1 - (*hardclip_low);
- high_querypos = *hardclip_high;
-
- while (low_querypos >= 0 && high_querypos >= 0 &&
- (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
- Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos--;
- high_querypos--;
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- while (low_querypos >= 0 && high_querypos >= 0 &&
- (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false ||
- Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false)) {
- /* Shift clip querypos downward (rightward on chromosome) */
- debug13(printf("Low GMAP does not contain %d or high GMAP does not contain higher querypos %d => ",
- low_querypos+1,high_querypos+1));
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos--;
- high_querypos--;
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- while (low_querypos < low_querylength && high_querypos < high_querylength &&
- (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
- Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false)) {
- /* Shift clip querypos upward (leftward on chromosome) */
- debug13(printf("Low GMAP does not contain %d or high GMAP does not contain lower querypos %d => ",
- low_querypos-1,high_querypos-1));
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos++;
- high_querypos++;
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- if (low_querypos < 0 || low_querypos >= low_querylength ||
- high_querypos < 0 || high_querypos >= high_querylength) {
- *hardclip_low = orig_hardclip_low;
- *hardclip_high = orig_hardclip_high;
+ if (plusp == true) {
+ low_querypos = hardclip_low;
+ high_querypos = high_querylength - 1 - hardclip_high;
+ } else {
+ low_querypos = low_querylength - 1 - hardclip_low;
+ high_querypos = hardclip_high;
+ }
+ debug13(printf("Dual GMAP. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false) {
+ debug13(printf("Fails because low_querypos %d is not in low_pairarray\n",low_querypos));
+ return false;
+ } else if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false) {
+ debug13(printf("Fails because low_querypos %d - 1 is not in low_pairarray\n",low_querypos));
+ return false;
+ } else if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false) {
+ debug13(printf("Fails because low_querypos %d + 1 is not in low_pairarray\n",low_querypos));
+ return false;
+ } else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false) {
+ debug13(printf("Fails because high_querypos %d is not in high_pairarray\n",low_querypos));
+ return false;
+ } else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false) {
+ debug13(printf("Fails because high_querypos %d - 1 is not in high_pairarray\n",low_querypos));
+ return false;
+ } else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false) {
+ debug13(printf("Fails because high_querypos %d + 1 is not in high_pairarray\n",low_querypos));
+ return false;
+ } else if (Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != Pairarray_lookup(high_pairarray,high_npairs,high_querypos)) {
+ debug13(printf("Fails because low genomicpos %u != high genomicpos %u\n",
+ Pairarray_lookup(low_pairarray,low_npairs,low_querypos),
+ Pairarray_lookup(high_pairarray,high_npairs,high_querypos)));
+ return false;
+ } else {
+ *common_genomicpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos) + chroffset;
+ debug13(printf("Succeeds with common point %u\n",*common_genomicpos - chroffset));
+ return true;
+ }
+
+ } else if (Stage3end_hittype(hit_low) == GMAP) {
+ low_pairarray = Stage3end_pairarray(hit_low);
+ low_npairs = Stage3end_npairs(hit_low);
+
+ if (plusp == true) {
+ low_querypos = hardclip_low;
+ high_querypos = high_querylength - 1 - hardclip_high;
+ } else {
+ low_querypos = low_querylength - 1 - hardclip_low;
+ high_querypos = hardclip_high;
+ }
+ debug13(printf("Low GMAP. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false) {
+ debug13(printf("Fails because low_querypos %d is not in low_pairarray\n",low_querypos));
+ return false;
+ } else if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false) {
+ debug13(printf("Fails because low_querypos %d - 1 is not in low_pairarray\n",low_querypos));
+ return false;
+ } else if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false) {
+ debug13(printf("Fails because low_querypos %d + 1 is not in low_pairarray\n",low_querypos));
+ return false;
+ } else if ((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL) {
+ debug13(printf("Fails because high_querypos %d gives a NULL substring\n",high_querypos));
+ return false;
+ } else if (Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring) {
+ debug13(printf("Fails because high_querypos %d - 1 gives substring %p\n",
+ high_querypos,Stage3end_substring_containing(hit_high,high_querypos-1)));
+ return false;
+ } else if (Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring) {
+ debug13(printf("Fails because high_querypos %d + 1 gives substring %p\n",
+ high_querypos,Stage3end_substring_containing(hit_high,high_querypos+1)));
+ return false;
+ } else if (plusp == true) {
+ if (Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != Substring_genomicstart_adj(high_substring) + high_querypos - chroffset) {
+ debug13(printf("Fails because low chrpos %u != high chrpos %u\n",
+ Pairarray_lookup(low_pairarray,low_npairs,low_querypos),
+ Substring_genomicstart_adj(high_substring) + high_querypos - chroffset));
+ return false;
+ }
+ } else {
+ if (Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset) {
+ debug13(printf("Fails because low chrpos %u != high chrpos %u\n",
+ Pairarray_lookup(low_pairarray,low_npairs,low_querypos),
+ (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset));
+ return false;
+ }
+ }
+
+ *common_genomicpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos) + chroffset;
+ debug13(printf("Succeeds with common point %u\n",*common_genomicpos - chroffset));
+ return true;
+
+ } else if (Stage3end_hittype(hit_high) == GMAP) {
+ high_pairarray = Stage3end_pairarray(hit_high);
+ high_npairs = Stage3end_npairs(hit_high);
+
+ if (plusp == true) {
+ low_querypos = hardclip_low;
+ high_querypos = high_querylength - 1 - hardclip_high;
+ } else {
+ low_querypos = low_querylength - 1 - hardclip_low;
+ high_querypos = hardclip_high;
+ }
+ debug13(printf("High GMAP. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
+ debug13(printf("Fails because low_querypos %d gives a NULL substring\n",low_querypos));
+ return false;
+ } else if (Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring) {
+ debug13(printf("Fails because low_querypos %d - 1 gives substring %p\n",
+ low_querypos,Stage3end_substring_containing(hit_low,low_querypos-1)));
+ return false;
+ } else if (Stage3end_substring_containing(hit_low,low_querypos+1) != low_substring) {
+ debug13(printf("Fails because low_querypos %d + 1 gives substring %p\n",
+ low_querypos,Stage3end_substring_containing(hit_low,low_querypos+1)));
+ return false;
+ } else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false) {
+ debug13(printf("Fails because high_querypos %d is not in high_pairarray\n",low_querypos));
+ return false;
+ } else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false) {
+ debug13(printf("Fails because high_querypos %d - 1 is not in high_pairarray\n",low_querypos));
+ return false;
+ } else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false) {
+ debug13(printf("Fails because high_querypos %d + 1 is not in high_pairarray\n",low_querypos));
+ return false;
+ } else if (plusp == true) {
+ if (Pairarray_lookup(high_pairarray,high_npairs,high_querypos) != Substring_genomicstart_adj(low_substring) + low_querypos - chroffset) {
+ debug13(printf("Fails because low chrpos %u != high chrpos %u\n",
+ Substring_genomicstart_adj(low_substring) + low_querypos - chroffset,
+ Pairarray_lookup(high_pairarray,high_npairs,high_querypos)));
+ return false;
+ }
+ } else {
+ if (Pairarray_lookup(high_pairarray,high_npairs,high_querypos) != (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset) {
+ debug13(printf("Fails because low chrpos %u != high chrpos %u\n",
+ (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset,
+ Pairarray_lookup(high_pairarray,high_npairs,high_querypos)));
+ return false;
+ }
+ }
+
+ *common_genomicpos = Pairarray_lookup(high_pairarray,high_npairs,high_querypos) + chroffset;
+ debug13(printf("Succeeds with common point %u\n",*common_genomicpos - chroffset));
+ return true;
+
+ } else {
+ if (plusp == true) {
+ low_querypos = hardclip_low;
+ high_querypos = high_querylength - 1 - hardclip_high;
+ debug13(printf("Both substrings, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
+ debug13(printf("Fails because low_querypos %d gives a NULL substring\n",low_querypos));
+ return false;
+ } else if (Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring) {
+ debug13(printf("Fails because low_querypos %d - 1 gives substring %p\n",
+ low_querypos,Stage3end_substring_containing(hit_low,low_querypos-1)));
+ return false;
+ } else if (Stage3end_substring_containing(hit_low,low_querypos+1) != low_substring) {
+ debug13(printf("Fails because low_querypos %d + 1 gives substring %p\n",
+ low_querypos,Stage3end_substring_containing(hit_low,low_querypos+1)));
+ return false;
+ } else if ((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL) {
+ debug13(printf("Fails because high_querypos %d gives a NULL substring\n",high_querypos));
+ return false;
+ } else if (Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring) {
+ debug13(printf("Fails because high_querypos %d - 1 gives substring %p\n",
+ high_querypos,Stage3end_substring_containing(hit_high,high_querypos-1)));
+ return false;
+ } else if (Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring) {
+ debug13(printf("Fails because high_querypos %d + 1 gives substring %p\n",
+ high_querypos,Stage3end_substring_containing(hit_high,high_querypos+1)));
+ return false;
+ } else if (Substring_genomicstart_adj(low_substring) + low_querypos - chroffset != Substring_genomicstart_adj(high_substring) + high_querypos - chroffset) {
+ debug13(printf("Fails because low chrpos %u != high chrpos %u\n",
+ Substring_genomicstart_adj(low_substring) + low_querypos - chroffset,
+ Substring_genomicstart_adj(high_substring) + high_querypos - chroffset));
+ return false;
+ } else {
+ *common_genomicpos = Substring_genomicstart_adj(low_substring) + low_querypos; /* Want univcoord */
+ debug13(printf("Succeeds with common point %u\n",*common_genomicpos - chroffset));
+ return true;
+ }
+
+ } else {
+ low_querypos = low_querylength - 1 - hardclip_low;
+ high_querypos = hardclip_high;
+ debug13(printf("Both substrings, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
+ debug13(printf("Fails because low_querypos %d gives a NULL substring\n",low_querypos));
+ return false;
+ } else if (Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring) {
+ debug13(printf("Fails because low_querypos %d - 1 gives substring %p\n",
+ low_querypos,Stage3end_substring_containing(hit_low,low_querypos-1)));
+ return false;
+ } else if (Stage3end_substring_containing(hit_low,low_querypos+1) != low_substring) {
+ debug13(printf("Fails because low_querypos %d + 1 gives substring %p\n",
+ low_querypos,Stage3end_substring_containing(hit_low,low_querypos+1)));
+ return false;
+ } else if ((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL) {
+ debug13(printf("Fails because high_querypos %d gives a NULL substring\n",high_querypos));
+ return false;
+ } else if (Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring) {
+ debug13(printf("Fails because high_querypos %d - 1 gives substring %p\n",
+ high_querypos,Stage3end_substring_containing(hit_high,high_querypos-1)));
+ return false;
+ } else if (Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring) {
+ debug13(printf("Fails because high_querypos %d + 1 gives substring %p\n",
+ high_querypos,Stage3end_substring_containing(hit_high,high_querypos+1)));
+ return false;
+ } else if ((Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset != (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset) {
+ debug13(printf("Fails because low chrpos %u != high chrpos %u\n",
+ (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset,
+ (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset));
+ return false;
+ } else {
+ *common_genomicpos = (Substring_genomicstart_adj(low_substring) - 1) - low_querypos; /* Want univcoord */
+ debug13(printf("Succeeds with common point %u\n",*common_genomicpos - chroffset));
+ return true;
+ }
+ }
+ }
+}
+
+
+
+/* Replaces adjust_hardclips in samprint.c */
+static Univcoord_T
+adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int low_querylength,
+ int hardclip_high, Stage3end_T hit_high, int high_querylength, Univcoord_T chroffset) {
+ Substring_T low_substring, high_substring;
+ struct Pair_T *low_pairarray, *high_pairarray;
+ int low_npairs, high_npairs;
+ int low_querypos, high_querypos;
+ Chrpos_T low_chrpos, high_chrpos;
+ bool plusp;
+
+ debug13(printf("Entering adjust_hardclips_right with hardclip_low %d, hardclip_high %d\n",
+ hardclip_low,hardclip_high));
+ *shift = 1; /* Making an initial move before each while loop */
+ plusp = Stage3end_plusp(hit_low);
+
+ if (Stage3end_hittype(hit_low) == GMAP && Stage3end_hittype(hit_high) == GMAP) {
+ low_pairarray = Stage3end_pairarray(hit_low);
+ low_npairs = Stage3end_npairs(hit_low);
+ high_pairarray = Stage3end_pairarray(hit_high);
+ high_npairs = Stage3end_npairs(hit_high);
+
+ if (plusp == true) {
+ low_querypos = hardclip_low;
+ high_querypos = high_querylength - 1 - hardclip_high;
+ debug13(printf("Dual GMAP, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ low_querypos++;
+ high_querypos++;
+ debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ while ((low_querypos + 1) < low_querylength && (high_querypos + 1) < high_querylength &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
+ Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
+ Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false ||
+ Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != Pairarray_lookup(high_pairarray,high_npairs,high_querypos))) {
+ (*shift) += 1;
+ if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false) {
+ low_querypos++;
+ } else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false) {
+ high_querypos++;
+ } else {
+ low_chrpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos);
+ high_chrpos = Pairarray_lookup(high_pairarray,high_npairs,high_querypos);
+ if (low_chrpos < high_chrpos) {
+ debug13(printf("low_chrpos %u < high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
+ low_querypos++;
+ } else if (high_chrpos < low_chrpos) {
+ debug13(printf("high_chrpos %u < low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
+ high_querypos++;
+ } else {
+ low_querypos++;
+ high_querypos++;
+ }
+ }
+ debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ }
+
+ if ((low_querypos + 1) >= low_querylength || (high_querypos + 1) >= high_querylength) {
+ *shift = 0;
+ return 0;
+ } else {
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == true);
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == true);
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == true);
+ return Pairarray_lookup(low_pairarray,low_npairs,low_querypos) + chroffset;
+ }
+
+ } else {
+ low_querypos = low_querylength - 1 - hardclip_low;
+ high_querypos = hardclip_high;
+ debug13(printf("Dual GMAP, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ low_querypos--;
+ high_querypos--;
+ debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ while ((low_querypos - 1) >= 0 && (high_querypos - 1) >= 0 &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
+ Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
+ Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false ||
+ Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != Pairarray_lookup(high_pairarray,high_npairs,high_querypos))) {
+ (*shift) += 1;
+ if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false) {
+ low_querypos--;
+ } else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false) {
+ high_querypos--;
+ } else {
+ low_chrpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos);
+ high_chrpos = Pairarray_lookup(high_pairarray,high_npairs,high_querypos);
+ if (low_chrpos < high_chrpos) {
+ debug13(printf("low_chrpos %u < high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
+ low_querypos--;
+ } else if (high_chrpos < low_chrpos) {
+ debug13(printf("high_chrpos %u < low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
+ high_querypos--;
+ } else {
+ low_querypos--;
+ high_querypos--;
+ }
+ }
+ debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ }
+
+ if ((low_querypos - 1) < 0 || (high_querypos - 1) < 0) {
+ *shift = 0;
+ return 0;
+ } else {
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == true);
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == true);
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == true);
+ return Pairarray_lookup(low_pairarray,low_npairs,low_querypos) + chroffset;
+ }
+ }
+
+ } else if (Stage3end_hittype(hit_low) == GMAP) {
+ low_pairarray = Stage3end_pairarray(hit_low);
+ low_npairs = Stage3end_npairs(hit_low);
+
+ if (plusp == true) {
+ low_querypos = hardclip_low;
+ high_querypos = high_querylength - 1 - hardclip_high;
+ debug13(printf("Low GMAP, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ low_querypos++;
+ high_querypos++;
+ debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ while ((low_querypos + 1) < low_querylength && (high_querypos + 1) < high_querylength &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
+ Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
+ Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false ||
+ (high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring ||
+ Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring ||
+ Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != Substring_genomicstart_adj(high_substring) + high_querypos - chroffset)) {
+ (*shift) += 1;
+ if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false) {
+ low_querypos++;
+ } else if ((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL) {
+ high_querypos++;
+ } else {
+ low_chrpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos);
+ high_chrpos = Substring_genomicstart_adj(high_substring) + high_querypos - chroffset;
+ if (low_chrpos < high_chrpos) {
+ debug13(printf("low_chrpos %u < high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
+ low_querypos++;
+ } else if (high_chrpos < low_chrpos) {
+ debug13(printf("high_chrpos %u < low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
+ high_querypos++;
+ } else {
+ low_querypos++;
+ high_querypos++;
+ }
+ }
+ debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ }
+
+ if ((low_querypos + 1) >= low_querylength || (high_querypos + 1) >= high_querylength) {
+ debug13(printf("Failing because low_querypos %d + 1 >= low_querylength %d\n",low_querypos,low_querylength));
+ *shift = 0;
+ return 0;
+ } else if (Stage3end_substring_containing(hit_high,high_querypos) == NULL) {
+ debug13(printf("Failing because no substring contains high_querypos %d\n",high_querypos));
+ *shift = 0;
+ return 0;
+ } else {
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == true);
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == true);
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == true);
+ assert((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) != NULL);
+ assert(Stage3end_substring_containing(hit_high,high_querypos-1) == high_substring);
+ assert(Stage3end_substring_containing(hit_high,high_querypos+1) == high_substring);
+ return Pairarray_lookup(low_pairarray,low_npairs,low_querypos) + chroffset;
+ }
+
+ } else {
+ low_querypos = low_querylength - 1 - hardclip_low;
+ high_querypos = hardclip_high;
+ debug13(printf("Low GMAP, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ low_querypos--;
+ high_querypos--;
+ debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ while ((low_querypos - 1) >= 0 && (high_querypos - 1) >= 0 &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
+ Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
+ Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false ||
+ (high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring ||
+ Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring ||
+ Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset)) {
+ (*shift) += 1;
+ if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false) {
+ low_querypos--;
+ } else if ((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL) {
+ high_querypos--;
+ } else {
+ low_chrpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos);
+ high_chrpos = (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset;
+ if (low_chrpos < high_chrpos) {
+ debug13(printf("low_chrpos %u < high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
+ low_querypos--;
+ } else if (high_chrpos < low_chrpos) {
+ debug13(printf("high_chrpos %u < low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
+ high_querypos--;
+ } else {
+ low_querypos--;
+ high_querypos--;
+ }
+ }
+ debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ }
+
+ if ((low_querypos - 1) < 0 || (high_querypos - 1) < 0) {
+ debug13(printf("Failing because low_querypos %d - 1 < 0\n",low_querypos));
+ *shift = 0;
+ return 0;
+ } else if (Stage3end_substring_containing(hit_high,high_querypos) == NULL) {
+ debug13(printf("Failing because no substring contains high_querypos %d\n",high_querypos));
+ *shift = 0;
+ return 0;
+ } else {
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == true);
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == true);
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == true);
+ assert((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) != NULL);
+ assert(Stage3end_substring_containing(hit_high,high_querypos-1) == high_substring);
+ assert(Stage3end_substring_containing(hit_high,high_querypos+1) == high_substring);
+ return Pairarray_lookup(low_pairarray,low_npairs,low_querypos) + chroffset;
+ }
+ }
+
+ } else if (Stage3end_hittype(hit_high) == GMAP) {
+ high_pairarray = Stage3end_pairarray(hit_high);
+ high_npairs = Stage3end_npairs(hit_high);
+
+ if (plusp == true) {
+ low_querypos = hardclip_low;
+ high_querypos = high_querylength - 1 - hardclip_high;
+ debug13(printf("High GMAP. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ low_querypos++;
+ high_querypos++;
+ debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ while ((high_querypos + 1) < high_querylength && (low_querypos + 1) < low_querylength &&
+ (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false ||
+ (low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring ||
+ Stage3end_substring_containing(hit_low,low_querypos+1) != low_substring ||
+ Pairarray_lookup(high_pairarray,high_npairs,high_querypos) != Substring_genomicstart_adj(low_substring) + low_querypos - chroffset)) {
+ (*shift) += 1;
+ if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
+ low_querypos++;
+ } else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false) {
+ high_querypos++;
+ } else {
+ low_chrpos = Substring_genomicstart_adj(low_substring) + low_querypos - chroffset;
+ high_chrpos = Pairarray_lookup(high_pairarray,high_npairs,high_querypos);
+ if (low_chrpos < high_chrpos) {
+ debug13(printf("low_chrpos %u < high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
+ low_querypos++;
+ } else if (high_chrpos < low_chrpos) {
+ debug13(printf("high_chrpos %u < low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
+ high_querypos++;
+ } else {
+ low_querypos++;
+ high_querypos++;
+ }
+ }
+ debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ }
+
+ if ((high_querypos + 1) >= high_querylength || (low_querypos + 1) >= low_querylength ||
+ Stage3end_substring_containing(hit_low,low_querypos) == NULL) {
+ *shift = 0;
+ return 0;
+ } else {
+ assert((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) != NULL);
+ assert(Stage3end_substring_containing(hit_low,low_querypos-1) == low_substring);
+ assert(Stage3end_substring_containing(hit_low,low_querypos+1) == low_substring);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == true);
+ return Pairarray_lookup(high_pairarray,high_npairs,high_querypos) + chroffset;
+ }
+
+ } else {
+ low_querypos = low_querylength - 1 - hardclip_low;
+ high_querypos = hardclip_high;
+ debug13(printf("High GMAP, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ low_querypos--;
+ high_querypos--;
+ debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ while ((high_querypos - 1) >= 0 && (low_querypos - 1) >= 0 &&
+ (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false ||
+ (low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring ||
+ Stage3end_substring_containing(hit_low,low_querypos+1) != low_substring ||
+ Pairarray_lookup(high_pairarray,high_npairs,high_querypos) != (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset)) {
+ (*shift) += 1;
+ if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
+ low_querypos--;
+ } else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false) {
+ high_querypos--;
+ } else {
+ low_chrpos = (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset;
+ high_chrpos = Pairarray_lookup(high_pairarray,high_npairs,high_querypos);
+ if (low_chrpos < high_chrpos) {
+ debug13(printf("low_chrpos %u < high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
+ low_querypos--;
+ } else if (high_chrpos < low_chrpos) {
+ debug13(printf("high_chrpos %u < low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
+ high_querypos--;
+ } else {
+ low_querypos--;
+ high_querypos--;
+ }
+ }
+ debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ }
+
+ if ((high_querypos - 1) < 0 || (low_querypos - 1) < 0 ||
+ Stage3end_substring_containing(hit_low,low_querypos) == NULL) {
+ *shift = 0;
+ return 0;
+ } else {
+ assert((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) != NULL);
+ assert(Stage3end_substring_containing(hit_low,low_querypos-1) == low_substring);
+ assert(Stage3end_substring_containing(hit_low,low_querypos+1) == low_substring);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == true);
+ return Pairarray_lookup(high_pairarray,high_npairs,high_querypos) + chroffset;
+ }
+ }
+
+ } else {
+ if (plusp == true) {
+ low_querypos = hardclip_low;
+ high_querypos = high_querylength - 1 - hardclip_high;
+ debug13(printf("Both substrings, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ low_querypos++;
+ high_querypos++;
+ debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ while ((low_querypos + 1) < low_querylength && (high_querypos + 1) < high_querylength &&
+ ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring ||
+ Stage3end_substring_containing(hit_low,low_querypos+1) != low_substring ||
+ (high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring ||
+ Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring ||
+ Substring_genomicstart_adj(low_substring) + low_querypos - chroffset != Substring_genomicstart_adj(high_substring) + high_querypos - chroffset)) {
+ (*shift) += 1;
+ if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
+ low_querypos++;
+ } else if ((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL) {
+ high_querypos++;
+ } else {
+ low_chrpos = Substring_genomicstart_adj(low_substring) + low_querypos - chroffset;
+ high_chrpos = Substring_genomicstart_adj(high_substring) + high_querypos - chroffset;
+ if (low_chrpos < high_chrpos) {
+ debug13(printf("low_chrpos %u < high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
+ low_querypos++;
+ } else if (high_chrpos < low_chrpos) {
+ debug13(printf("high_chrpos %u < low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
+ high_querypos++;
+ } else {
+ low_querypos++;
+ high_querypos++;
+ }
+ }
+ debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ }
+
+ if ((low_querypos + 1) >= low_querylength ||
+ (high_querypos + 1) >= high_querylength ||
+ (low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_high,high_querypos) == NULL) {
+ *shift = 0;
+ return 0;
+ } else {
+ debug13(printf("Returning %u + %d\n",Substring_genomicstart_adj(low_substring) - chroffset,
+ low_querypos));
+ assert((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) != NULL);
+ assert((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) != NULL);
+ assert(Stage3end_substring_containing(hit_low,low_querypos-1) == low_substring);
+ assert(Stage3end_substring_containing(hit_low,low_querypos+1) == low_substring);
+ assert(Stage3end_substring_containing(hit_high,high_querypos-1) == high_substring);
+ assert(Stage3end_substring_containing(hit_high,high_querypos+1) == high_substring);
+ return Substring_genomicstart_adj(low_substring) + low_querypos; /* Want univcoord */
+ }
+
+ } else {
+ low_querypos = low_querylength - 1 - hardclip_low;
+ high_querypos = hardclip_high;
+ debug13(printf("Both substrings, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ low_querypos--;
+ high_querypos--;
+ debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ while ((low_querypos - 1) >= 0 && (high_querypos - 1) >= 0 &&
+ ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring ||
+ Stage3end_substring_containing(hit_low,low_querypos+1) != low_substring ||
+ (high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring ||
+ Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring ||
+ (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset != (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset)) {
+ (*shift) += 1;
+ if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
+ low_querypos--;
+ } else if ((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL) {
+ high_querypos--;
+ } else {
+ low_chrpos = (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset;
+ high_chrpos = (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset;
+ if (low_chrpos < high_chrpos) {
+ debug13(printf("low_chrpos %u < high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
+ low_querypos--;
+ } else if (high_chrpos < low_chrpos) {
+ debug13(printf("high_chrpos %u < low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
+ high_querypos--;
+ } else {
+ low_querypos--;
+ high_querypos--;
+ }
+ }
+ debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ }
+
+ if ((low_querypos - 1) < 0 ||
+ (high_querypos - 1) < 0 ||
+ (low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_high,high_querypos) == NULL) {
+ *shift = 0;
+ return 0;
+ } else {
+ debug13(printf("Returning %u - %d\n",Substring_genomicstart_adj(low_substring) - chroffset,
+ low_querypos));
+ assert((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) != NULL);
+ assert((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) != NULL);
+ assert(Stage3end_substring_containing(hit_low,low_querypos-1) == low_substring);
+ assert(Stage3end_substring_containing(hit_low,low_querypos+1) == low_substring);
+ assert(Stage3end_substring_containing(hit_high,high_querypos-1) == high_substring);
+ assert(Stage3end_substring_containing(hit_high,high_querypos+1) == high_substring);
+ return (Substring_genomicstart_adj(low_substring) - 1) - low_querypos; /* Want univcoord */
+ }
+ }
+ }
+}
+
+
+/* Replaces adjust_hardclips in samprint.c */
+static Univcoord_T
+adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int low_querylength,
+ int hardclip_high, Stage3end_T hit_high, int high_querylength, Univcoord_T chroffset) {
+ Substring_T low_substring, high_substring;
+ struct Pair_T *low_pairarray, *high_pairarray;
+ int low_npairs, high_npairs;
+ int low_querypos, high_querypos;
+ Chrpos_T low_chrpos, high_chrpos;
+ bool plusp;
+
+ debug13(printf("Entering adjust_hardclips_left with hardclip_low %d, hardclip_high %d\n",
+ hardclip_low,hardclip_high));
+ *shift = 1; /* Making an initial move before each while loop */
+ plusp = Stage3end_plusp(hit_low);
+
+ if (Stage3end_hittype(hit_low) == GMAP && Stage3end_hittype(hit_high) == GMAP) {
+ low_pairarray = Stage3end_pairarray(hit_low);
+ low_npairs = Stage3end_npairs(hit_low);
+ high_pairarray = Stage3end_pairarray(hit_high);
+ high_npairs = Stage3end_npairs(hit_high);
+
+ if (plusp == true) {
+ low_querypos = hardclip_low;
+ high_querypos = high_querylength - 1 - hardclip_high;
+ debug13(printf("Dual GMAP, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ low_querypos--;
+ high_querypos--;
+ debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ while ((low_querypos - 1) >= 0 && (high_querypos - 1) >= 0 &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
+ Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
+ Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false ||
+ Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != Pairarray_lookup(high_pairarray,high_npairs,high_querypos))) {
+ (*shift) += 1;
+ if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false) {
+ low_querypos--;
+ } else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false) {
+ high_querypos--;
+ } else {
+ low_chrpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos);
+ high_chrpos = Pairarray_lookup(high_pairarray,high_npairs,high_querypos);
+ if (low_chrpos > high_chrpos) {
+ debug13(printf("low_chrpos %u > high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
+ low_querypos--;
+ } else if (high_chrpos > low_chrpos) {
+ debug13(printf("high_chrpos %u > low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
+ high_querypos--;
+ } else {
+ low_querypos--;
+ high_querypos--;
+ }
+ }
+ debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ }
+
+ if ((low_querypos - 1) < 0 || (high_querypos - 1) < 0) {
+ *shift = 0;
+ return 0;
+ } else {
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == true);
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == true);
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == true);
+ return Pairarray_lookup(low_pairarray,low_npairs,low_querypos) + chroffset;
+ }
+
+ } else {
+ low_querypos = low_querylength - 1 - hardclip_low;
+ high_querypos = hardclip_high;
+ debug13(printf("Dual GMAP, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ low_querypos++;
+ high_querypos++;
+ debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ while ((low_querypos + 1) < low_querylength && (high_querypos + 1) < high_querylength &&
+ (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
+ Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
+ Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false ||
+ Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != Pairarray_lookup(high_pairarray,high_npairs,high_querypos))) {
+ (*shift) += 1;
+ if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false) {
+ low_querypos++;
+ } else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false) {
+ high_querypos++;
+ } else {
+ low_chrpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos);
+ high_chrpos = Pairarray_lookup(high_pairarray,high_npairs,high_querypos);
+ if (low_chrpos > high_chrpos) {
+ debug13(printf("low_chrpos %u > high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
+ low_querypos++;
+ } else if (high_chrpos > low_chrpos) {
+ debug13(printf("high_chrpos %u > low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
+ high_querypos++;
+ } else {
+ low_querypos++;
+ high_querypos++;
+ }
+ }
+ debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ }
+
+ if ((low_querypos + 1) >= low_querylength || (high_querypos + 1) >= high_querylength) {
+ *shift = 0;
+ return 0;
+ } else {
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == true);
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == true);
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == true);
+ return Pairarray_lookup(low_pairarray,low_npairs,low_querypos) + chroffset;
}
}
} else if (Stage3end_hittype(hit_low) == GMAP) {
- debug13(printf("Low GMAP\n"));
low_pairarray = Stage3end_pairarray(hit_low);
low_npairs = Stage3end_npairs(hit_low);
if (plusp == true) {
- low_querypos = *hardclip_low;
- high_querypos = high_querylength - 1 - (*hardclip_high);
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
-
- while (low_querypos < low_querylength && high_querypos < high_querylength &&
+ low_querypos = hardclip_low;
+ high_querypos = high_querylength - 1 - hardclip_high;
+ debug13(printf("Low GMAP, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ low_querypos--;
+ high_querypos--;
+ debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ while ((low_querypos - 1) >= 0 && (high_querypos - 1) >= 0 &&
(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
- high_substring == NULL)) {
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos++;
- high_querypos++;
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- while (low_querypos < low_querylength && high_substring != NULL &&
- (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
- Substring_contains_p(high_substring,high_querypos-1) == false)) {
- /* Shift clip querypos upward (rightward on chromosome) */
- debug13(printf("Low GMAP does not contain %d or high %d..%d does not contain lower querypos %d => ",
- low_querypos-1,Substring_querystart(high_substring),Substring_queryend(high_substring),high_querypos-1));
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos++;
- high_querypos++;
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- while (low_querypos >= 0 && high_substring != NULL &&
- (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false ||
- Substring_contains_p(high_substring,high_querypos+1) == false)) {
- /* Shift clip querypos downward (leftward on chromosome) */
- debug13(printf("Low GMAP does not contain %d or high %d..%d does not contain higher querypos %d => ",
- low_querypos+1,Substring_querystart(high_substring),Substring_queryend(high_substring),high_querypos+1));
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos--;
- high_querypos--;
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- if (low_querypos < 0 || low_querypos >= low_querylength || high_substring == NULL) {
- *hardclip_low = orig_hardclip_low;
- *hardclip_high = orig_hardclip_high;
- }
-
- } else {
- low_querypos = low_querylength - 1 - (*hardclip_low);
- high_querypos = *hardclip_high;
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
-
- while (low_querypos >= 0 && high_querypos >= 0 &&
+ Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
+ Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false ||
+ (high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring ||
+ Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring ||
+ Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != Substring_genomicstart_adj(high_substring) + high_querypos - chroffset)) {
+ (*shift) += 1;
+ if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false) {
+ low_querypos--;
+ } else if ((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL) {
+ high_querypos--;
+ } else {
+ low_chrpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos);
+ high_chrpos = Substring_genomicstart_adj(high_substring) + high_querypos - chroffset;
+ if (low_chrpos > high_chrpos) {
+ debug13(printf("low_chrpos %u > high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
+ low_querypos--;
+ } else if (high_chrpos > low_chrpos) {
+ debug13(printf("high_chrpos %u > low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
+ high_querypos--;
+ } else {
+ low_querypos--;
+ high_querypos--;
+ }
+ }
+ debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ }
+
+ if ((low_querypos - 1) < 0 || (high_querypos - 1) < 0 ||
+ Stage3end_substring_containing(hit_high,high_querypos) == NULL) {
+ *shift = 0;
+ return 0;
+ } else {
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == true);
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == true);
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == true);
+ assert((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) != NULL);
+ assert(Stage3end_substring_containing(hit_high,high_querypos-1) == high_substring);
+ assert(Stage3end_substring_containing(hit_high,high_querypos+1) == high_substring);
+ return Pairarray_lookup(low_pairarray,low_npairs,low_querypos) + chroffset;
+ }
+
+ } else {
+ low_querypos = low_querylength - 1 - hardclip_low;
+ high_querypos = hardclip_high;
+ debug13(printf("Low GMAP, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ low_querypos++;
+ high_querypos++;
+ debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ while ((low_querypos + 1) < low_querylength && (high_querypos + 1) < high_querylength &&
(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
- high_substring == NULL)) {
- /* Shift clip querypos downward (rightward on chromosome) */
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos--;
- high_querypos--;
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- while (low_querypos >= 0 && high_substring != NULL &&
- (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false ||
- Substring_contains_p(high_substring,high_querypos+1) == false)) {
- /* Shift clip querypos downward (rightward on chromosome) */
- debug13(printf("Low GMAP does not contain %d or high %d..%d does not contain higher querypos %d => ",
- low_querypos+1,Substring_querystart(high_substring),Substring_queryend(high_substring),high_querypos+1));
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos--;
- high_querypos--;
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- while (low_querypos < low_querylength && high_substring != NULL &&
- (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
- Substring_contains_p(high_substring,high_querypos-1) == false)) {
- /* Shift clip querypos upward (leftward on chromosome) */
- debug13(printf("Low GMAP does not contain %d or high %d..%d does not contain lower querypos %d => ",
- low_querypos-1,Substring_querystart(high_substring),Substring_queryend(high_substring),high_querypos-1));
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos++;
- high_querypos++;
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- if (low_querypos < 0 || low_querypos >= low_querylength || high_substring == NULL) {
- *hardclip_low = orig_hardclip_low;
- *hardclip_high = orig_hardclip_high;
+ Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
+ Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false ||
+ (high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring ||
+ Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring ||
+ Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset)) {
+ (*shift) += 1;
+ if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false) {
+ low_querypos++;
+ } else if ((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL) {
+ high_querypos++;
+ } else {
+ low_chrpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos);
+ high_chrpos = (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset;
+ if (low_chrpos > high_chrpos) {
+ debug13(printf("low_chrpos %u > high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
+ low_querypos++;
+ } else if (high_chrpos > low_chrpos) {
+ debug13(printf("high_chrpos %u > low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
+ high_querypos++;
+ } else {
+ low_querypos++;
+ high_querypos++;
+ }
+ }
+ debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ }
+
+ if ((low_querypos + 1) >= low_querylength || (high_querypos + 1) >= high_querylength ||
+ Stage3end_substring_containing(hit_high,high_querypos) == NULL) {
+ *shift = 0;
+ return 0;
+ } else {
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == true);
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == true);
+ assert(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == true);
+ assert((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) != NULL);
+ assert(Stage3end_substring_containing(hit_high,high_querypos-1) == high_substring);
+ assert(Stage3end_substring_containing(hit_high,high_querypos+1) == high_substring);
+ return Pairarray_lookup(low_pairarray,low_npairs,low_querypos) + chroffset;
}
}
} else if (Stage3end_hittype(hit_high) == GMAP) {
- debug13(printf("High GMAP\n"));
high_pairarray = Stage3end_pairarray(hit_high);
high_npairs = Stage3end_npairs(hit_high);
if (plusp == true) {
- low_querypos = *hardclip_low;
- high_querypos = high_querylength - 1 - (*hardclip_high);
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
-
- while (low_querypos < low_querylength && high_querypos < high_querylength &&
- (low_substring == NULL ||
- Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos++;
- high_querypos++;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- while (low_substring != NULL && high_querypos < high_querylength &&
- (Substring_contains_p(low_substring,low_querypos-1) == false ||
- Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false)) {
- /* Shift clip querypos upward (rightward on chromosome) */
- debug13(printf("Low %d..%d does not contain %d or high GMAP does not contain lower querypos %d => ",
- Substring_querystart(low_substring),Substring_queryend(low_substring),low_querypos-1,high_querypos-1));
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos++;
- high_querypos++;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- while (low_substring != NULL && high_querypos >= 0 &&
- (Substring_contains_p(low_substring,low_querypos+1) == false ||
- Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false)) {
- /* Shift clip querypos downward (leftward on chromosome) */
- debug13(printf("Low %d..%d does not contain %d or high GMAP does not contain higher querypos %d => ",
- Substring_querystart(low_substring),Substring_queryend(low_substring),low_querypos+1,high_querypos+1));
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos--;
- high_querypos--;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- if (high_querypos < 0 || high_querypos >= high_querylength || low_substring == NULL) {
- *hardclip_low = orig_hardclip_low;
- *hardclip_high = orig_hardclip_high;
- }
-
- } else {
- low_querypos = low_querylength - 1 - (*hardclip_low);
- high_querypos = *hardclip_high;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
-
- while (low_querypos >= 0 && high_querypos >= 0 &&
- (low_substring == NULL ||
- Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos--;
- high_querypos--;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- while (low_substring != NULL && high_querypos >= 0 &&
- (Substring_contains_p(low_substring,low_querypos+1) == false ||
- Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false)) {
- /* Shift clip querypos downward (rightward on chromosome) */
- debug13(printf("Low %d..%d does not contain %d or high GMAP does not contain higher querypos %d => ",
- Substring_querystart(low_substring),Substring_queryend(low_substring),low_querypos+1,high_querypos+1));
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos--;
- high_querypos--;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- while (low_substring != NULL && high_querypos < high_querylength &&
- (Substring_contains_p(low_substring,low_querypos-1) == false ||
- Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false)) {
- /* Shift clip querypos upward (rightward on chromosome) */
- debug13(printf("Low %d..%d does not contain %d or high GMAP does not contain lower querypos %d => ",
- Substring_querystart(low_substring),Substring_queryend(low_substring),low_querypos-1,high_querypos-1));
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos++;
- high_querypos++;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- if (high_querypos < 0 || high_querypos >= high_querylength || low_substring == NULL) {
- *hardclip_low = orig_hardclip_low;
- *hardclip_high = orig_hardclip_high;
+ low_querypos = hardclip_low;
+ high_querypos = high_querylength - 1 - hardclip_high;
+ debug13(printf("High GMAP, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ low_querypos--;
+ high_querypos--;
+ debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ while ((high_querypos - 1) >= 0 && (low_querypos - 1) >= 0 &&
+ (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false ||
+ (low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring ||
+ Stage3end_substring_containing(hit_low,low_querypos+1) != low_substring ||
+ Pairarray_lookup(high_pairarray,high_npairs,high_querypos) != Substring_genomicstart_adj(low_substring) + low_querypos - chroffset)) {
+ (*shift) += 1;
+ if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
+ low_querypos--;
+ } else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false) {
+ high_querypos--;
+ } else {
+ low_chrpos = Substring_genomicstart_adj(low_substring) + low_querypos - chroffset;
+ high_chrpos = Pairarray_lookup(high_pairarray,high_npairs,high_querypos);
+ if (low_chrpos > high_chrpos) {
+ debug13(printf("low_chrpos %u > high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
+ low_querypos--;
+ } else if (high_chrpos > low_chrpos) {
+ debug13(printf("high_chrpos %u > low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
+ high_querypos--;
+ } else {
+ low_querypos--;
+ high_querypos--;
+ }
+ }
+ debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ }
+
+ if ((high_querypos - 1) < 0 || (low_querypos - 1) < 0 ||
+ Stage3end_substring_containing(hit_low,low_querypos) == NULL) {
+ *shift = 0;
+ return 0;
+ } else {
+ assert((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) != NULL);
+ assert(Stage3end_substring_containing(hit_low,low_querypos-1) == low_substring);
+ assert(Stage3end_substring_containing(hit_low,low_querypos+1) == low_substring);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == true);
+ return Pairarray_lookup(high_pairarray,high_npairs,high_querypos) + chroffset;
+ }
+
+ } else {
+ low_querypos = low_querylength - 1 - hardclip_low;
+ high_querypos = hardclip_high;
+ debug13(printf("High GMAP, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ low_querypos++;
+ high_querypos++;
+ debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ while ((high_querypos + 1) < high_querylength && (low_querypos + 1) < low_querylength &&
+ (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false ||
+ Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false ||
+ (low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring ||
+ Stage3end_substring_containing(hit_low,low_querypos+1) != low_substring ||
+ Pairarray_lookup(high_pairarray,high_npairs,high_querypos) != (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset)) {
+ (*shift) += 1;
+ if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
+ low_querypos++;
+ } else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false) {
+ high_querypos++;
+ } else {
+ low_chrpos = (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset;
+ high_chrpos = Pairarray_lookup(high_pairarray,high_npairs,high_querypos);
+ if (low_chrpos > high_chrpos) {
+ debug13(printf("low_chrpos %u > high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
+ low_querypos++;
+ } else if (high_chrpos > low_chrpos) {
+ debug13(printf("high_chrpos %u > low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
+ high_querypos++;
+ } else {
+ low_querypos++;
+ high_querypos++;
+ }
+ }
+ debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ }
+
+ if ((high_querypos + 1) >= high_querylength || (low_querypos + 1) >= low_querylength ||
+ Stage3end_substring_containing(hit_low,low_querypos) == NULL) {
+ *shift = 0;
+ return 0;
+ } else {
+ assert((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) != NULL);
+ assert(Stage3end_substring_containing(hit_low,low_querypos-1) == low_substring);
+ assert(Stage3end_substring_containing(hit_low,low_querypos+1) == low_substring);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == true);
+ assert(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == true);
+ return Pairarray_lookup(high_pairarray,high_npairs,high_querypos) + chroffset;
}
}
} else {
if (plusp == true) {
- debug13(printf("Both substrings, plus\n"));
-
- low_querypos = *hardclip_low;
- high_querypos = high_querylength - 1 - *hardclip_high;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
-
- debug13(printf("Shifting upward with low_substring %p and high_substring %p\n",low_substring,high_substring));
- while (low_querypos < low_querylength && high_querypos < high_querylength &&
- (low_substring == NULL || high_substring == NULL)) {
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos++;
- high_querypos++;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- debug13(printf("Shifting upward with low_substring %p and high_substring %p\n",low_substring,high_substring));
- while (low_substring != NULL && high_substring != NULL &&
- (Substring_contains_p(low_substring,low_querypos-1) == false ||
- Substring_contains_p(high_substring,high_querypos-1) == false)) {
- /* Shift clip querypos upward (rightward on chromosome) */
- debug13(printf("Low %d..%d does not contain %d or high %d..%d does not contain lower querypos %d => ",
- Substring_querystart(low_substring),Substring_queryend(low_substring),low_querypos-1,
- Substring_querystart(high_substring),Substring_queryend(high_substring),high_querypos-1));
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos++;
- high_querypos++;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- debug13(printf("Shifting downward with low_substring %p and high_substring %p\n",low_substring,high_substring));
- while (low_substring != NULL && high_substring != NULL &&
- (Substring_contains_p(low_substring,low_querypos+1) == false ||
- Substring_contains_p(high_substring,high_querypos+1) == false)) {
- /* Shift clip querypos downward (leftward on chromosome) */
- debug13(printf("Low %d..%d does not contain %d or high %d..%d does not contain higher querypos %d => ",
- Substring_querystart(low_substring),Substring_queryend(low_substring),low_querypos+1,
- Substring_querystart(high_substring),Substring_queryend(high_substring),high_querypos+1));
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos--;
- high_querypos--;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- if (low_substring == NULL || high_substring == NULL) {
- *hardclip_low = orig_hardclip_low;
- *hardclip_high = orig_hardclip_high;
- }
-
- } else {
- debug13(printf("Both substrings, minus\n"));
-
- low_querypos = low_querylength - 1 - (*hardclip_low);
- high_querypos = *hardclip_high;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
-
- debug13(printf("Shifting downward with low_substring %p and high_substring %p\n",low_substring,high_substring));
- while (low_querypos >= 0 && high_querypos >= 0 &&
- (low_substring == NULL || high_substring == NULL)) {
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos--;
- high_querypos--;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- debug13(printf("Shifting downward with low_substring %p and high_substring %p\n",low_substring,high_substring));
- while (low_substring != NULL && high_substring != NULL &&
- (Substring_contains_p(low_substring,low_querypos+1) == false ||
- Substring_contains_p(high_substring,high_querypos+1) == false)) {
- /* Shift clip querypos downward (rightward on chromosome) */
- debug13(printf("Low %d..%d does not contain %d or high %d..%d does not contain higher querypos %d => ",
- Substring_querystart(low_substring),Substring_queryend(low_substring),low_querypos+1,
- Substring_querystart(high_substring),Substring_queryend(high_substring),high_querypos+1));
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos--;
- high_querypos--;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- debug13(printf("Shifting upward with low_substring %p and high_substring %p\n",low_substring,high_substring));
- while (low_substring != NULL && high_substring != NULL &&
- (Substring_contains_p(low_substring,low_querypos-1) == false ||
- Substring_contains_p(high_substring,high_querypos-1) == false)) {
- /* Shift clip querypos upward (rightward on chromosome) */
- debug13(printf("Low %d..%d does not contain %d or high %d..%d does not contain lower querypos %d => ",
- Substring_querystart(low_substring),Substring_queryend(low_substring),low_querypos-1,
- Substring_querystart(high_substring),Substring_queryend(high_substring),high_querypos-1));
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos++;
- high_querypos++;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- debug13(printf("Trying hardclip_low %d and hardclip_high %d\n",*hardclip_low,*hardclip_high));
- }
-
- if (low_substring == NULL || high_substring == NULL) {
- *hardclip_low = orig_hardclip_low;
- *hardclip_high = orig_hardclip_high;
+ low_querypos = hardclip_low;
+ high_querypos = high_querylength - 1 - hardclip_high;
+ debug13(printf("Both substrings, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ low_querypos--;
+ high_querypos--;
+ debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ while ((low_querypos - 1) >= 0 && (high_querypos - 1) >= 0 &&
+ ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring ||
+ Stage3end_substring_containing(hit_low,low_querypos+1) != low_substring ||
+ (high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring ||
+ Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring ||
+ Substring_genomicstart_adj(low_substring) + low_querypos - chroffset != Substring_genomicstart_adj(high_substring) + high_querypos - chroffset)) {
+ (*shift) += 1;
+ if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
+ low_querypos--;
+ } else if ((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL) {
+ high_querypos--;
+ } else {
+ low_chrpos = Substring_genomicstart_adj(low_substring) + low_querypos - chroffset;
+ high_chrpos = Substring_genomicstart_adj(high_substring) + high_querypos - chroffset;
+ if (low_chrpos > high_chrpos) {
+ debug13(printf("low_chrpos %u > high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
+ low_querypos--;
+ } else if (high_chrpos > low_chrpos) {
+ debug13(printf("high_chrpos %u > low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
+ high_querypos--;
+ } else {
+ low_querypos--;
+ high_querypos--;
+ }
+ }
+ debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ }
+
+ if ((low_querypos - 1) < 0 || (high_querypos - 1) < 0 ||
+ (low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_high,high_querypos) == NULL) {
+ *shift = 0;
+ return 0;
+ } else {
+ debug13(printf("Returning %u + %d\n",Substring_genomicstart_adj(low_substring) - chroffset,
+ low_querypos));
+ assert((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) != NULL);
+ assert((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) != NULL);
+ assert(Stage3end_substring_containing(hit_low,low_querypos-1) == low_substring);
+ assert(Stage3end_substring_containing(hit_low,low_querypos+1) == low_substring);
+ assert(Stage3end_substring_containing(hit_high,high_querypos-1) == high_substring);
+ assert(Stage3end_substring_containing(hit_high,high_querypos+1) == high_substring);
+ return Substring_genomicstart_adj(low_substring) + low_querypos; /* Want univcoord */
+ }
+
+ } else {
+ low_querypos = low_querylength - 1 - hardclip_low;
+ high_querypos = hardclip_high;
+ debug13(printf("Both substrings, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
+ low_querypos++;
+ high_querypos++;
+ debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ while ((low_querypos + 1) < low_querylength && (high_querypos + 1) < high_querylength &&
+ ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring ||
+ Stage3end_substring_containing(hit_low,low_querypos+1) != low_substring ||
+ (high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring ||
+ Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring ||
+ (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset != (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset)) {
+ (*shift) += 1;
+ if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
+ low_querypos++;
+ } else if ((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL) {
+ high_querypos++;
+ } else {
+ low_chrpos = (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset;
+ high_chrpos = (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset;
+ if (low_chrpos > high_chrpos) {
+ debug13(printf("low_chrpos %u > high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
+ low_querypos++;
+ } else if (high_chrpos > low_chrpos) {
+ debug13(printf("high_chrpos %u > low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
+ high_querypos++;
+ } else {
+ low_querypos++;
+ high_querypos++;
+ }
+ }
+ debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ }
+
+ if ((low_querypos + 1) >= low_querylength || (high_querypos + 1) >= high_querylength ||
+ (low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
+ Stage3end_substring_containing(hit_high,high_querypos) == NULL) {
+ *shift = 0;
+ return 0;
+ } else {
+ debug13(printf("Returning %u - %d\n",Substring_genomicstart_adj(low_substring) - chroffset,
+ low_querypos));
+ assert((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) != NULL);
+ assert((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) != NULL);
+ assert(Stage3end_substring_containing(hit_low,low_querypos-1) == low_substring);
+ assert(Stage3end_substring_containing(hit_low,low_querypos+1) == low_substring);
+ assert(Stage3end_substring_containing(hit_high,high_querypos-1) == high_substring);
+ assert(Stage3end_substring_containing(hit_high,high_querypos+1) == high_substring);
+ return (Substring_genomicstart_adj(low_substring) - 1) - low_querypos; /* Want univcoord */
}
}
}
-
- debug13(printf("Exiting adjust_hardclips with hardclip_low %d, hardclip_high %d\n",
- *hardclip_low,*hardclip_high));
-
- return;
}
@@ -3048,7 +3844,8 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
int clipdir;
int ilength53, ilength35, ilength5_low, ilength5_high, ilength3_low, ilength3_high;
int common_shift, common_left, common_right;
- Univcoord_T common_genomicpos;
+ Univcoord_T common_genomicpos, common_genomicpos_right, common_genomicpos_left;
+ int shift_right, shift_left;
*hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
@@ -3087,9 +3884,13 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
debug13(printf("Cannot determine a common point, so returning 0\n"));
return 0;
+ } else if (find_ilengths(&ilength5_low,&ilength5_high,hit5,common_genomicpos,hit5->chroffset) == false ||
+ find_ilengths(&ilength3_low,&ilength3_high,hit3,common_genomicpos,hit3->chroffset) == false) {
+ debug13(printf("Cannot determine ilengths, so returning 0\n"));
+ return 0;
+
} else {
- find_ilengths(&ilength5_low,&ilength5_high,hit5,common_genomicpos,hit5->chroffset);
- find_ilengths(&ilength3_low,&ilength3_high,hit3,common_genomicpos,hit3->chroffset);
+ debug13(printf("Inclusive: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
debug13(printf("ilength53 is %d, ilength 35 is %d\n",ilength5_low + ilength3_high - 1,ilength3_low + ilength5_high - 1));
common_left = (ilength5_low < ilength3_low) ? ilength5_low : ilength3_low;
@@ -3098,39 +3899,97 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
common_shift = common_right/2 - (common_left - 1)/2;
debug13(printf("Common shift is %d = common_right %d/2 - (common_left %d - 1)/2\n",
common_shift,common_right,common_left));
+ ilength5_low -= 1;
+ ilength3_low -= 1;
} else {
common_shift = (common_right - 1)/2 - common_left/2;
debug13(printf("Common shift is %d = (common_right %d - 1)/2 - common_left %d/2\n",
common_shift,common_right,common_left));
+ ilength5_high -= 1;
+ ilength3_high -= 1;
}
+ debug13(printf("Exclusive: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
- if ((ilength53 = ilength5_low + ilength3_high - 1) >= (ilength35 = ilength3_low + ilength5_high - 1)) {
+
+ if ((ilength53 = ilength5_low + ilength3_high) >= (ilength35 = ilength3_low + ilength5_high)) {
/* Use >=, not >, so we favor clipping heads over clipping tails in case of a tie */
debug13(printf("plus, ilength53 is longer. Clipping heads.\n"));
- overlap = common_left + common_right - 1;
debug13(printf("Overlap is %d = common_left %d + common_right %d - 1\n",
- overlap,common_left,common_right));
+ common_left+common_right-1,common_left,common_right));
clipdir = +1;
- if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
- /* Revise only for paired-ends involving GMAP and when successful. Observed to be the correct action. */
- this->insertlength = ilength53;
- }
-
/* Want to clip 5 high and 3 low */
*hardclip5_high = ilength5_high - common_shift;
- *hardclip3_low = overlap - (*hardclip5_high);
+ *hardclip3_low = ilength3_low + common_shift;
debug13(printf("Overlap clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
*hardclip5_high += hit5->trim_right + hit5->end_amb_length;
*hardclip3_low += hit3->trim_left + hit3->start_amb_length;
debug13(printf("Ambig clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
- adjust_hardclips(&(*hardclip3_low),hit3,hit3->querylength_adj,
- &(*hardclip5_high),hit5,hit5->querylength_adj);
- debug13(printf("Adjusted clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
- *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ if (common_shift != 0) {
+ if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,hit3->querylength_adj,
+ *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset) == true) {
+ /* No adjustment needed, but need to recompute ilengths for shifted common_genomicpos */
+ } else {
+ common_genomicpos_right = adjust_hardclips_right(&shift_right,*hardclip3_low,hit3,hit3->querylength_adj,
+ *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset);
+ common_genomicpos_left = adjust_hardclips_left(&shift_left,*hardclip3_low,hit3,hit3->querylength_adj,
+ *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset);
+ debug13(printf("shift_right %d, shift_left %d\n",shift_right,shift_left));
+ if (shift_right == 0 && shift_left == 0) {
+ /* Try original position without a shift */
+ *hardclip5_high = ilength5_high /*- common_shift*/;
+ *hardclip3_low = ilength3_low /*+ common_shift*/;
+ *hardclip5_high += hit5->trim_right + hit5->end_amb_length;
+ *hardclip3_low += hit3->trim_left + hit3->start_amb_length;
+ if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,hit3->querylength_adj,
+ *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset) == false) {
+ *hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
+ return 0;
+ }
+ } else if (shift_left == 0) {
+ common_genomicpos = common_genomicpos_right;
+ } else if (shift_right == 0) {
+ common_genomicpos = common_genomicpos_left;
+ } else if (shift_right <= shift_left) {
+ common_genomicpos = common_genomicpos_right;
+ } else {
+ common_genomicpos = common_genomicpos_left;
+ }
+ }
+
+ debug13(printf("New common point is %u\n",common_genomicpos - hit3->chroffset));
+ /* Recompute hardclips */
+ if (find_ilengths(&ilength5_low,&ilength5_high,hit5,common_genomicpos,hit5->chroffset) == false ||
+ find_ilengths(&ilength3_low,&ilength3_high,hit3,common_genomicpos,hit3->chroffset) == false) {
+ *hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
+ return 0;
+ } else if (ilength3_low > ilength5_high) {
+ debug13(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ ilength3_low -= 1;
+ } else {
+ debug13(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ ilength5_high -= 1;
+ }
+ debug13(printf("Even: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+
+ *hardclip5_high = ilength5_high /*- common_shift*/;
+ *hardclip3_low = ilength3_low /*+ common_shift*/;
+ *hardclip5_high += hit5->trim_right + hit5->end_amb_length;
+ *hardclip3_low += hit3->trim_left + hit3->start_amb_length;
+
+ debug13(printf("Recomputed clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ }
+
+ if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
+ /* Revise only for paired-ends involving GMAP and when successful. Observed to be the correct action. */
+ this->insertlength = ilength53;
+ }
+
+#if 0
if (*hardclip5_high < 0) {
*hardclip5_high = 0;
}
@@ -3139,33 +3998,85 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
}
debug13(printf("Positive clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+#endif
} else {
debug13(printf("plus, ilength35 is longer. Clipping tails.\n"));
- overlap = common_left + common_right - 1;
debug13(printf("Overlap is %d = common_left %d + common_right %d - 1\n",
- overlap,common_left,common_right));
+ common_left+common_right-1,common_left,common_right));
clipdir = -1;
- if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
- /* Revise only for paired-ends involving GMAP and when successful. Observed to be the correct action. */
- this->insertlength = ilength35;
- }
-
/* Want to clip 5 low and 3 high */
*hardclip5_low = ilength5_low + common_shift;
- *hardclip3_high = overlap - (*hardclip5_low);
+ *hardclip3_high = ilength3_high - common_shift;
debug13(printf("Overlap clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
*hardclip5_low += hit5->trim_left + hit5->start_amb_length;
*hardclip3_high += hit3->trim_right + hit3->end_amb_length;
debug13(printf("Ambig clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
- adjust_hardclips(&(*hardclip5_low),hit5,hit5->querylength_adj,
- &(*hardclip3_high),hit3,hit3->querylength_adj);
- debug13(printf("Adjusted clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
- *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ if (common_shift != 0) {
+ if (test_hardclips(&common_genomicpos,*hardclip5_low,hit5,hit5->querylength_adj,
+ *hardclip3_high,hit3,hit3->querylength_adj,hit3->chroffset) == true) {
+ /* No adjustment needed, but need to recompute ilengths for shifted common_genomicpos */
+ } else {
+ common_genomicpos_right = adjust_hardclips_right(&shift_right,*hardclip5_low,hit5,hit5->querylength_adj,
+ *hardclip3_high,hit3,hit3->querylength_adj,hit3->chroffset);
+ common_genomicpos_left = adjust_hardclips_left(&shift_left,*hardclip5_low,hit5,hit5->querylength_adj,
+ *hardclip3_high,hit3,hit3->querylength_adj,hit3->chroffset);
+ debug13(printf("shift_right %d, shift_left %d\n",shift_right,shift_left));
+ if (shift_right == 0 && shift_left == 0) {
+ /* Try original position without a shift */
+ *hardclip5_low = ilength5_low /*+ common_shift*/;
+ *hardclip3_high = ilength3_high /*- common_shift*/;
+ *hardclip5_low += hit5->trim_left + hit5->start_amb_length;
+ *hardclip3_high += hit3->trim_right + hit3->end_amb_length;
+ if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,hit3->querylength_adj,
+ *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset) == false) {
+ *hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
+ return 0;
+ }
+ } else if (shift_left == 0) {
+ common_genomicpos = common_genomicpos_right;
+ } else if (shift_right == 0) {
+ common_genomicpos = common_genomicpos_left;
+ } else if (shift_right <= shift_left) {
+ common_genomicpos = common_genomicpos_right;
+ } else {
+ common_genomicpos = common_genomicpos_left;
+ }
+ }
+
+ debug13(printf("New common point is %u\n",common_genomicpos - hit3->chroffset));
+ /* Recompute hardclips */
+ if (find_ilengths(&ilength5_low,&ilength5_high,hit5,common_genomicpos,hit5->chroffset) == false ||
+ find_ilengths(&ilength3_low,&ilength3_high,hit3,common_genomicpos,hit3->chroffset) == false) {
+ *hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
+ return 0;
+ } else if (ilength5_low > ilength3_high) {
+ debug13(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ ilength5_low -= 1;
+ } else {
+ debug13(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ ilength3_high -= 1;
+ }
+ debug13(printf("Even: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+
+ *hardclip5_low = ilength5_low /*+ common_shift*/;
+ *hardclip3_high = ilength3_high /*- common_shift*/;
+ *hardclip5_low += hit5->trim_left + hit5->start_amb_length;
+ *hardclip3_high += hit3->trim_right + hit3->end_amb_length;
+ debug13(printf("Recomputed clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ }
+
+ if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
+ /* Revise only for paired-ends involving GMAP and when successful. Observed to be the correct action. */
+ this->insertlength = ilength35;
+ }
+
+#if 0
if (*hardclip5_low < 0) {
*hardclip5_low = 0;
}
@@ -3174,6 +4085,7 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
}
debug13(printf("Positive clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+#endif
}
debug13(printf("returning clipdir %d\n",clipdir));
@@ -3198,9 +4110,13 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
debug13(printf("Cannot determine a common point, so returning 0\n"));
return 0;
+ } else if (find_ilengths(&ilength5_low,&ilength5_high,hit5,common_genomicpos,hit5->chroffset) == false ||
+ find_ilengths(&ilength3_low,&ilength3_high,hit3,common_genomicpos,hit3->chroffset) == false) {
+ debug13(printf("Cannot determine ilengths, so returning 0\n"));
+ return 0;
+
} else {
- find_ilengths(&ilength5_low,&ilength5_high,hit5,common_genomicpos,hit5->chroffset);
- find_ilengths(&ilength3_low,&ilength3_high,hit3,common_genomicpos,hit3->chroffset);
+ debug13(printf("Inclusive: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
debug13(printf("ilength53lh is %d, ilength35lh is %d\n",ilength5_low + ilength3_high - 1,ilength3_low + ilength5_high - 1));
common_left = (ilength5_low < ilength3_low) ? ilength5_low : ilength3_low;
@@ -3209,13 +4125,18 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
common_shift = common_right/2 - (common_left - 1)/2;
debug13(printf("Common shift is %d = common_right %d/2 - (common_left %d - 1)/2\n",
common_shift,common_right,common_left));
+ ilength5_low -= 1;
+ ilength3_low -= 1;
} else {
common_shift = (common_right - 1)/2 - common_left/2;
debug13(printf("Common shift is %d = (common_right %d - 1)/2 - common_left %d/2\n",
common_shift,common_right,common_left));
+ ilength5_high -= 1;
+ ilength3_high -= 1;
}
+ debug13(printf("Exclusive: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
- if ((ilength53 = ilength5_low + ilength3_high - 1) > (ilength35 = ilength3_low + ilength5_high - 1)) {
+ if ((ilength53 = ilength5_low + ilength3_high) > (ilength35 = ilength3_low + ilength5_high)) {
/* Use >, not >=, so we favor clipping heads over clipping tails in case of a tie */
debug13(printf("minus, ilength53 is longer. Clipping tails.\n"));
overlap = common_left + common_right - 1;
@@ -3223,24 +4144,77 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
overlap,common_left,common_right));
clipdir = +1;
- if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
- /* Revise only for paired-ends involving GMAP and when successful. Observed to be the correct action. */
- this->insertlength = ilength53;
- }
/* Want to clip 5 high and 3 low */
*hardclip5_high = ilength5_high - common_shift;
- *hardclip3_low = overlap - (*hardclip5_high);
+ *hardclip3_low = ilength3_low + common_shift;
debug13(printf("Overlap clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
*hardclip5_high += hit5->trim_left + hit5->start_amb_length;
*hardclip3_low += hit3->trim_right + hit3->end_amb_length;
debug13(printf("Ambig clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
- adjust_hardclips(&(*hardclip3_low),hit3,hit3->querylength_adj,
- &(*hardclip5_high),hit5,hit5->querylength_adj);
- debug13(printf("Adjusted clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
- *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+
+ if (common_shift != 0) {
+ if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,hit3->querylength_adj,
+ *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset) == true) {
+ /* No adjustment needed, but need to recompute ilengths for shifted common_genomicpos */
+ } else {
+ common_genomicpos_right = adjust_hardclips_right(&shift_right,*hardclip3_low,hit3,hit3->querylength_adj,
+ *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset);
+ common_genomicpos_left = adjust_hardclips_left(&shift_left,*hardclip3_low,hit3,hit3->querylength_adj,
+ *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset);
+ debug13(printf("shift_right %d, shift_left %d\n",shift_right,shift_left));
+ if (shift_right == 0 && shift_left == 0) {
+ /* Try original position without a shift */
+ *hardclip5_high = ilength5_high /*- common_shift*/;
+ *hardclip3_low = ilength3_low /*+ common_shift*/;
+ *hardclip5_high += hit5->trim_left + hit5->start_amb_length;
+ *hardclip3_low += hit3->trim_right + hit3->end_amb_length;
+ if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,hit3->querylength_adj,
+ *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset) == false) {
+ *hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
+ return 0;
+ }
+ } else if (shift_left == 0) {
+ common_genomicpos = common_genomicpos_right;
+ } else if (shift_right == 0) {
+ common_genomicpos = common_genomicpos_left;
+ } else if (shift_right <= shift_left) {
+ common_genomicpos = common_genomicpos_right;
+ } else {
+ common_genomicpos = common_genomicpos_left;
+ }
+ }
+
+ debug13(printf("New common point is %u\n",common_genomicpos - hit3->chroffset));
+ /* Recompute hardclips */
+ if (find_ilengths(&ilength5_low,&ilength5_high,hit5,common_genomicpos,hit5->chroffset) == false ||
+ find_ilengths(&ilength3_low,&ilength3_high,hit3,common_genomicpos,hit3->chroffset) == false) {
+ *hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
+ return 0;
+ } else if (ilength3_low > ilength5_high) {
+ debug13(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ ilength3_low -= 1;
+ } else {
+ debug13(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ ilength5_high -= 1;
+ }
+ debug13(printf("Even: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+
+ *hardclip5_high = ilength5_high /*- common_shift*/;
+ *hardclip3_low = ilength3_low /*+ common_shift*/;
+ *hardclip5_high += hit5->trim_left + hit5->start_amb_length;
+ *hardclip3_low += hit3->trim_right + hit3->end_amb_length;
+ debug13(printf("Recomputed clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ }
+
+ if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
+ /* Revise only for paired-ends involving GMAP and when successful. Observed to be the correct action. */
+ this->insertlength = ilength53;
+ }
+#if 0
if (*hardclip5_high < 0) {
*hardclip5_high = 0;
}
@@ -3249,6 +4223,7 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
}
debug13(printf("Positive clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+#endif
} else {
debug13(printf("minus, ilength35 is longer. Clipping heads.\n"));
@@ -3257,25 +4232,77 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
overlap,common_left,common_right));
clipdir = -1;
- if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
- /* Revise only for paired-ends involving GMAP and when successful. Observed to be the correct action. */
- this->insertlength = ilength35;
- }
-
- /* Want to clip 5 low and 3 high. Verified. */
+ /* Want to clip 5 low and 3 high */
*hardclip5_low = ilength5_low + common_shift;
- *hardclip3_high = overlap - (*hardclip5_low);
+ *hardclip3_high = ilength3_high - common_shift;
debug13(printf("Overlap clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
*hardclip5_low += hit5->trim_right + hit5->end_amb_length;
*hardclip3_high += hit3->trim_left + hit3->start_amb_length;
debug13(printf("Ambig clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
- adjust_hardclips(&(*hardclip5_low),hit5,hit5->querylength_adj,
- &(*hardclip3_high),hit3,hit3->querylength_adj);
- debug13(printf("Adjusted clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
- *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ if (common_shift != 0) {
+ if (test_hardclips(&common_genomicpos,*hardclip5_low,hit5,hit5->querylength_adj,
+ *hardclip3_high,hit3,hit3->querylength_adj,hit3->chroffset) == true) {
+ /* No adjustment needed, but need to recompute ilengths for shifted common_genomicpos */
+ } else {
+ common_genomicpos_right = adjust_hardclips_right(&shift_right,*hardclip5_low,hit5,hit5->querylength_adj,
+ *hardclip3_high,hit3,hit3->querylength_adj,hit3->chroffset);
+ common_genomicpos_left = adjust_hardclips_left(&shift_left,*hardclip5_low,hit5,hit5->querylength_adj,
+ *hardclip3_high,hit3,hit3->querylength_adj,hit3->chroffset);
+ debug13(printf("shift_right %d, shift_left %d\n",shift_right,shift_left));
+ if (shift_right == 0 && shift_left == 0) {
+ /* Try original position without a shift */
+ *hardclip5_low = ilength5_low /*+ common_shift*/;
+ *hardclip3_high = ilength3_high /*- common_shift*/;
+ *hardclip5_low += hit5->trim_right + hit5->end_amb_length;
+ *hardclip3_high += hit3->trim_left + hit3->start_amb_length;
+ if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,hit3->querylength_adj,
+ *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset) == false) {
+ *hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
+ return 0;
+ }
+ } else if (shift_left == 0) {
+ common_genomicpos = common_genomicpos_right;
+ } else if (shift_right == 0) {
+ common_genomicpos = common_genomicpos_left;
+ } else if (shift_right <= shift_left) {
+ common_genomicpos = common_genomicpos_right;
+ } else {
+ common_genomicpos = common_genomicpos_left;
+ }
+ }
+
+ debug13(printf("New common point is %u\n",common_genomicpos - hit3->chroffset));
+ /* Recompute hardclips */
+ if (find_ilengths(&ilength5_low,&ilength5_high,hit5,common_genomicpos,hit5->chroffset) == false ||
+ find_ilengths(&ilength3_low,&ilength3_high,hit3,common_genomicpos,hit3->chroffset) == false) {
+ *hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
+ return 0;
+ } else if (ilength5_low > ilength3_high) {
+ debug13(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ ilength5_low -= 1;
+ } else {
+ debug13(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ ilength3_high -= 1;
+ }
+ debug13(printf("Even: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+
+ *hardclip5_low = ilength5_low /*+ common_shift*/;
+ *hardclip3_high = ilength3_high /*- common_shift*/;
+ *hardclip5_low += hit5->trim_right + hit5->end_amb_length;
+ *hardclip3_high += hit3->trim_left + hit3->start_amb_length;
+ debug13(printf("Recomputed clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+ }
+
+ if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
+ /* Revise only for paired-ends involving GMAP and when successful. Observed to be the correct action. */
+ this->insertlength = ilength35;
+ }
+
+#if 0
if (*hardclip5_low < 0) {
*hardclip5_low = 0;
}
@@ -3284,6 +4311,7 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
}
debug13(printf("Positive clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
+#endif
}
}
@@ -3801,9 +4829,9 @@ Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Comp
}
}
- if ((substring = Substring_new(/*nmismatches*/0,chrnum,chroffset,chrhigh,chrlength,
- left,genomicstart,genomicend,query_compress,
- /*start_endtype*/END,/*end_endtype*/END,
+ if ((substring = Substring_new(/*nmismatches*/0,chrnum,chroffset,chrhigh,chrlength,left,
+ genomicstart,genomicend,/*genomicstart_adj*/genomicstart,/*genomicend_adj*/genomicend,
+ query_compress,/*start_endtype*/END,/*end_endtype*/END,
/*querystart*/0,/*queryend*/genomiclength,/*querylength*/genomiclength,
/*alignstart*/genomicstart,/*alignend*/genomicend,
genomiclength,/*extraleft*/0,/*extraright*/0,/*exactp*/true,
@@ -3938,9 +4966,9 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
}
}
- if ((substring = Substring_new(nmismatches_whole,chrnum,chroffset,chrhigh,chrlength,
- left,genomicstart,genomicend,query_compress,
- /*start_endtype*/END,/*end_endtype*/END,
+ if ((substring = Substring_new(nmismatches_whole,chrnum,chroffset,chrhigh,chrlength,left,
+ genomicstart,genomicend,/*genomicstart_adj*/genomicstart,/*genomicend_adj*/genomicend,
+ query_compress,/*start_endtype*/END,/*end_endtype*/END,
/*querystart*/0,/*queryend*/genomiclength,/*querylength*/genomiclength,
/*alignstart*/genomicstart,/*alignend*/genomicend,
genomiclength,/*extraleft*/0,/*extraright*/0,/*exactp*/false,
@@ -4078,6 +5106,7 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
Substring_T substring1, substring2;
int querystart1, queryend1, querystart2, queryend2;
Univcoord_T genomicstart, genomicend;
+ Univcoord_T genomicstart_adj_2, genomicend_adj_2;
Univcoord_T alignstart1, alignend1, alignstart2, alignend2;
debug2(printf("Entered with left %llu, querylength %d, genomiclength %d, indel_pos %d\n",
@@ -4095,40 +5124,48 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
queryend2 = querylength;
if (plusp == true) {
- if ((genomicend = left + genomiclength) > chrhigh) {
- return (T) NULL;
- } else {
- genomicstart = left;
+ genomicstart = left;
+ genomicend = left + genomiclength;
+
+ genomicstart_adj_2 = genomicstart - nindels;
+ genomicend_adj_2 = genomicend - nindels;
+
+ alignstart1 = genomicstart;
+ alignend1 = alignstart2 = genomicstart + indel_pos;
+ alignend2 = genomicend/* - nindels*/;
- alignstart1 = genomicstart;
- alignend1 = alignstart2 = genomicstart + indel_pos;
- alignend2 = genomicend/* - nindels*/;
+ if (genomicend > chrhigh) {
+ return (T) NULL;
}
} else {
- if ((genomicstart = left + genomiclength) > chrhigh) {
- return (T) NULL;
- } else {
- genomicend = left;
+ genomicend = left;
+ genomicstart = left + genomiclength;
+
+ genomicstart_adj_2 = genomicstart + nindels;
+ genomicend_adj_2 = genomicend + nindels;
+
+ alignstart1 = genomicstart;
+ alignend1 = alignstart2 = genomicstart - indel_pos;
+ alignend2 = genomicend/* + nindels*/;
- alignstart1 = genomicstart;
- alignend1 = alignstart2 = genomicstart - indel_pos;
- alignend2 = genomicend/* + nindels*/;
+ if (genomicstart > chrhigh) {
+ return (T) NULL;
}
}
- if ((substring1 = Substring_new(nmismatches1_whole,chrnum,chroffset,chrhigh,chrlength,
- left,genomicstart,genomicend,query_compress,
- /*start_endtype*/END,/*end_endtype*/INS,
+ if ((substring1 = Substring_new(nmismatches1_whole,chrnum,chroffset,chrhigh,chrlength,left,
+ genomicstart,genomicend,/*genomicstart_adj*/genomicstart,/*genomicend_adj*/genomicend,
+ query_compress,/*start_endtype*/END,/*end_endtype*/INS,
querystart1,queryend1,querylength,alignstart1,alignend1,genomiclength,
/*extraleft*/0,/*extraright*/0,/*exactp*/false,plusp,genestrand,first_read_p,
/*trim_left_p (previously was end1_indel_p ? false : true)*/true,
/*trim_right_p*/false,/*minlength*/0)) == NULL) {
return (T) NULL;
- } else if ((substring2 = Substring_new(nmismatches2_whole,chrnum,chroffset,chrhigh,chrlength,
- left,genomicstart,genomicend,query_compress,
- /*start_endtype*/INS,/*end_endtype*/END,
+ } else if ((substring2 = Substring_new(nmismatches2_whole,chrnum,chroffset,chrhigh,chrlength,left,
+ genomicstart,genomicend,genomicstart_adj_2,genomicend_adj_2,
+ query_compress,/*start_endtype*/INS,/*end_endtype*/END,
querystart2,queryend2,querylength,alignstart2,alignend2,genomiclength,
/*extraleft*/0,/*extraright*/0,/*exactp*/false,plusp,genestrand,first_read_p,
/*trim_left_p*/false,
@@ -4275,8 +5312,8 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
Substring_T substring1, substring2;
int querystart1, queryend1, querystart2, queryend2;
Univcoord_T genomicstart, genomicend;
+ Univcoord_T genomicstart_adj_2, genomicend_adj_2;
Univcoord_T alignstart1, alignend1, alignstart2, alignend2;
- Univcoord_T left2;
debug3(printf("Entered with left %llu, querylength %d, genomiclength %d, indel_pos %d\n",
(unsigned long long) left,querylength,genomiclength,indel_pos));
@@ -4293,59 +5330,60 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
queryend2 = querylength;
if (plusp == true) {
- if ((genomicend = left + genomiclength) > chrhigh) {
- return (T) NULL;
- } else {
- genomicstart = left;
+ genomicstart = left;
+ genomicend = left + genomiclength;
- alignstart1 = genomicstart;
- alignend1 = genomicstart + indel_pos;
- alignstart2 = alignend1 + nindels;
- alignend2 = genomicend/* + nindels*/;
+ genomicstart_adj_2 = genomicstart + nindels;
+ genomicend_adj_2 = genomicend + nindels;
- /* left1 = left; */
- left2 = left + nindels;
+ alignstart1 = genomicstart;
+ alignend1 = genomicstart + indel_pos;
+ alignstart2 = alignend1 + nindels;
+ alignend2 = genomicend/* + nindels*/;
- debug3(printf("plusp is true. genomicstart %llu, genomicend %llu, alignstart1 %llu, alignend1 %llu, alignstart2 %llu, alignend2 %llu, left1 %llu, left2 %llu\n",
- (unsigned long long) genomicstart,(unsigned long long) genomicend,
- (unsigned long long) alignstart1,(unsigned long long) alignend1,(unsigned long long) alignstart2,
- (unsigned long long) alignend2,(unsigned long long) left,(unsigned long long) left2));
+ debug3(printf("plusp is true. genomicstart %llu, genomicend %llu, alignstart1 %llu, alignend1 %llu, alignstart2 %llu, alignend2 %llu, left1 %llu, left2 %llu\n",
+ (unsigned long long) genomicstart,(unsigned long long) genomicend,
+ (unsigned long long) alignstart1,(unsigned long long) alignend1,(unsigned long long) alignstart2,
+ (unsigned long long) alignend2,(unsigned long long) left,(unsigned long long) left2));
+
+ if (genomicend > chrhigh) {
+ return (T) NULL;
}
} else {
- if ((genomicstart = left + genomiclength) > chrhigh) {
- return (T) NULL;
- } else {
- genomicend = left;
+ genomicend = left;
+ genomicstart = left + genomiclength;
+
+ genomicstart_adj_2 = genomicstart - nindels;
+ genomicend_adj_2 = genomicend - nindels;
- alignstart1 = genomicstart;
- alignend1 = genomicstart - indel_pos;
- alignstart2 = alignend1 - nindels;
- alignend2 = genomicend/* - nindels*/;
+ alignstart1 = genomicstart;
+ alignend1 = genomicstart - indel_pos;
+ alignstart2 = alignend1 - nindels;
+ alignend2 = genomicend/* - nindels*/;
- /* left1 = left; */
- left2 = left + nindels;
+ debug3(printf("plusp is false. genomicstart %llu, genomicend %llu, alignstart1 %llu, alignend1 %llu, alignstart2 %llu, alignend2 %llu, left1 %llu, left2 %llu\n",
+ (unsigned long long) genomicstart,(unsigned long long) genomicend,
+ (unsigned long long) alignstart1,(unsigned long long) alignend1,(unsigned long long) alignstart2,
+ (unsigned long long) alignend2,(unsigned long long) left,(unsigned long long) left2));
- debug3(printf("plusp is false. genomicstart %llu, genomicend %llu, alignstart1 %llu, alignend1 %llu, alignstart2 %llu, alignend2 %llu, left1 %llu, left2 %llu\n",
- (unsigned long long) genomicstart,(unsigned long long) genomicend,
- (unsigned long long) alignstart1,(unsigned long long) alignend1,(unsigned long long) alignstart2,
- (unsigned long long) alignend2,(unsigned long long) left,(unsigned long long) left2));
+ if (genomicstart > chrhigh) {
+ return (T) NULL;
}
}
-
- if ((substring1 = Substring_new(nmismatches1_whole,chrnum,chroffset,chrhigh,chrlength,
- left,genomicstart,genomicend,query_compress,
- /*start_endtype*/END,/*end_endtype*/DEL,
+ if ((substring1 = Substring_new(nmismatches1_whole,chrnum,chroffset,chrhigh,chrlength,left,
+ genomicstart,genomicend,/*genomicstart_adj*/genomicstart,/*genomicend_adj*/genomicend,
+ query_compress,/*start_endtype*/END,/*end_endtype*/DEL,
querystart1,queryend1,querylength,alignstart1,alignend1,genomiclength,
/*extraleft*/0,/*extraright*/0,/*exactp*/false,plusp,genestrand,first_read_p,
/*trim_left_p (previously was end1_indel_p ? false : true)*/true,
/*trim_right_p*/false,/*minlength*/0)) == NULL) {
return (T) NULL;
- } else if ((substring2 = Substring_new(nmismatches2_whole,chrnum,chroffset,chrhigh,chrlength,
- left,genomicstart,genomicend,query_compress,
- /*start_endtype*/DEL,/*end_endtype*/END,
+ } else if ((substring2 = Substring_new(nmismatches2_whole,chrnum,chroffset,chrhigh,chrlength,left,
+ genomicstart,genomicend,genomicstart_adj_2,genomicend_adj_2,
+ query_compress,/*start_endtype*/DEL,/*end_endtype*/END,
querystart2,queryend2,querylength,alignstart2,alignend2,genomiclength,
/*extraleft*/0,/*extraright*/0,/*exactp*/false,plusp,genestrand,first_read_p,
/*trim_left_p*/false,
@@ -4383,10 +5421,10 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
new->deletion = (char *) NULL;
new->indel_pos = indel_pos;
if (plusp == true) {
- new->substring_LtoH = List_push(List_push(NULL,new->substring1),new->substring2);
+ new->substring_LtoH = List_push(List_push(NULL,new->substring2),new->substring1);
new->indel_low = indel_pos;
} else {
- new->substring_LtoH = List_push(List_push(NULL,new->substring2),new->substring1);
+ new->substring_LtoH = List_push(List_push(NULL,new->substring1),new->substring2);
new->indel_low = querylength - indel_pos;
}
#endif
@@ -5468,9 +6506,9 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
minlength = TERMINAL_COMPUTE_MINLENGTH;
}
- if ((substring = Substring_new(/*nmismatches_whole*/0,chrnum,chroffset,chrhigh,chrlength,
- left,genomicstart,genomicend,query_compress,
- start_endtype,end_endtype,querystart,queryend,querylength,
+ if ((substring = Substring_new(/*nmismatches_whole*/0,chrnum,chroffset,chrhigh,chrlength,left,
+ genomicstart,genomicend,/*genomicstart_adj*/genomicstart,/*genomicend_adj*/genomicend,
+ query_compress,start_endtype,end_endtype,querystart,queryend,querylength,
alignstart,alignend,/*genomiclength*/querylength,
/*extraleft*/0,/*extraright*/0,/*exactp*/false,plusp,genestrand,first_read_p,
trim_left_p,trim_right_p,minlength)) == NULL) {
@@ -9601,37 +10639,90 @@ Stage3pair_print (Result_T result, Resulttype_T resulttype,
static List_T
+strip_gaps_at_head (List_T pairs) {
+ Pair_T pair;
+
+ while (pairs != NULL && (pair = pairs->first) != NULL &&
+ (pair->gapp == true || pair->cdna == ' ' || pair->genome == ' ')) {
+ pairs = List_pop_out(pairs,(void **) &pair);
+ Pair_free_out(&pair);
+ }
+
+ return pairs;
+}
+
+static List_T
+strip_gaps_at_tail (List_T pairs) {
+ Pair_T pair;
+
+ if (pairs != NULL) {
+ pairs = List_reverse(pairs);
+
+ while (pairs != NULL && (pair = pairs->first) != NULL &&
+ (pair->gapp == true || pair->cdna == ' ' || pair->genome == ' ')) {
+ pairs = List_pop_out(pairs,(void **) &pair);
+ Pair_free_out(&pair);
+ }
+
+ pairs = List_reverse(pairs);
+ }
+
+ return pairs;
+}
+
+
+static List_T
Stage3end_convert_to_pairs (List_T pairs, T hit, Shortread_T queryseq,
int clipdir, int hardclip_low, int hardclip_high,
bool first_read_p, int queryseq_offset) {
+ Pair_T pair;
+ List_T newpairs = NULL, p;
+ Chrpos_T genomicpos1, genomicpos2;
if (hit->hittype == EXACT || hit->hittype == SUB || hit->hittype == TERMINAL) {
- return Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ debug13(printf("Converting exact/sub\n"));
+ return Substring_convert_to_pairs(pairs,hit->substring1,queryseq,hardclip_low,hardclip_high,queryseq_offset);
} else if (hit->hittype == INSERTION) {
- pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ debug13(printf("Converting insertion\n"));
+
+ pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,hardclip_low,hardclip_high,queryseq_offset);
pairs = Substring_add_insertion(pairs,hit->substring1,hit->substring2,/*insertionlength*/hit->nindels,queryseq,
- clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ hardclip_low,hardclip_high,queryseq_offset);
+ if (hit->plusp == true) {
+ pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,hardclip_low,hardclip_high,queryseq_offset);
+ } else {
+ pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,hardclip_low,hardclip_high,queryseq_offset);
+ }
return pairs;
} else if (hit->hittype == DELETION) {
- pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ debug13(printf("Converting deletion\n"));
+
+ pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,hardclip_low,hardclip_high,queryseq_offset);
pairs = Substring_add_deletion(pairs,hit->substring1,hit->substring2,/*deletion*/hit->deletion,/*deletionlength*/hit->nindels,
- clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ hardclip_low,hardclip_high,queryseq_offset);
+ if (hit->plusp == true) {
+ pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,hardclip_low,hardclip_high,queryseq_offset);
+ } else {
+ pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,hardclip_low,hardclip_high,queryseq_offset);
+ }
return pairs;
} else if (hit->hittype == HALFSPLICE_DONOR) {
- return Substring_convert_to_pairs(pairs,hit->substring_donor,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ debug13(printf("Converting halfsplice_donor\n"));
+ return Substring_convert_to_pairs(pairs,hit->substring_donor,queryseq,hardclip_low,hardclip_high,queryseq_offset);
} else if (hit->hittype == HALFSPLICE_ACCEPTOR) {
- return Substring_convert_to_pairs(pairs,hit->substring_acceptor,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ debug13(printf("Converting halfsplice_acceptor\n"));
+ return Substring_convert_to_pairs(pairs,hit->substring_acceptor,queryseq,hardclip_low,hardclip_high,queryseq_offset);
} else if (hit->hittype == SPLICE || hit->hittype == SAMECHR_SPLICE) {
- pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
- pairs = Substring_add_intron(pairs,hit->substring1,hit->substring2,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ debug13(printf("Converting splice\n"));
+
+ pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,hardclip_low,hardclip_high,queryseq_offset);
+ pairs = Substring_add_intron(pairs,hit->substring1,hit->substring2,hardclip_low,hardclip_high,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,hardclip_low,hardclip_high,queryseq_offset);
return pairs;
} else if (hit->hittype == TRANSLOC_SPLICE) {
@@ -9640,19 +10731,23 @@ Stage3end_convert_to_pairs (List_T pairs, T hit, Shortread_T queryseq,
return NULL;
} else if (hit->hittype == ONE_THIRD_SHORTEXON) {
- return Substring_convert_to_pairs(pairs,/*shortexon*/hit->substring1,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ debug13(printf("Converting 1/3 shortexon\n"));
+
+ return Substring_convert_to_pairs(pairs,/*shortexon*/hit->substring1,queryseq,hardclip_low,hardclip_high,queryseq_offset);
} else if (hit->hittype == TWO_THIRDS_SHORTEXON) {
+ debug13(printf("Converting 2/3 shortexon\n"));
+
if (hit->substring0 == NULL) {
- pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
- pairs = Substring_add_intron(pairs,hit->substring1,hit->substring2,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,hardclip_low,hardclip_high,queryseq_offset);
+ pairs = Substring_add_intron(pairs,hit->substring1,hit->substring2,hardclip_low,hardclip_high,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,hardclip_low,hardclip_high,queryseq_offset);
return pairs;
} else if (hit->substring2 == NULL) {
- pairs = Substring_convert_to_pairs(pairs,hit->substring0,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
- pairs = Substring_add_intron(pairs,hit->substring0,hit->substring1,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring0,queryseq,hardclip_low,hardclip_high,queryseq_offset);
+ pairs = Substring_add_intron(pairs,hit->substring0,hit->substring1,hardclip_low,hardclip_high,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,hardclip_low,hardclip_high,queryseq_offset);
return pairs;
} else {
@@ -9660,14 +10755,16 @@ Stage3end_convert_to_pairs (List_T pairs, T hit, Shortread_T queryseq,
}
} else if (hit->hittype == SHORTEXON) {
- pairs = Substring_convert_to_pairs(pairs,hit->substring0,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
- pairs = Substring_add_intron(pairs,hit->substring0,hit->substring1,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
- pairs = Substring_add_intron(pairs,hit->substring1,hit->substring2,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
+ debug13(printf("Converting shortexon\n"));
+ pairs = Substring_convert_to_pairs(pairs,hit->substring0,queryseq,hardclip_low,hardclip_high,queryseq_offset);
+ pairs = Substring_add_intron(pairs,hit->substring0,hit->substring1,hardclip_low,hardclip_high,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,hardclip_low,hardclip_high,queryseq_offset);
+ pairs = Substring_add_intron(pairs,hit->substring1,hit->substring2,hardclip_low,hardclip_high,queryseq_offset);
+ pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,hardclip_low,hardclip_high,queryseq_offset);
return pairs;
} else if (hit->hittype == GMAP) {
+ debug13(printf("Converting gmap\n"));
return Pair_convert_array_to_pairs(pairs,hit->pairarray,hit->npairs,hit->plusp,hit->querylength,
clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
@@ -9685,10 +10782,13 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
int hardclip5_low, int hardclip5_high, int hardclip3_low, int hardclip3_high) {
struct Pair_T *pairarray, *newpair;
Pair_T oldpair;
- List_T pairs = NULL, p;
+ List_T pairs, pairs5, pairs3, p;
T hit5, hit3;
int querylengthA, querylengthB;
char *queryseq_ptr_5, *queryseq_ptr_3, *quality_ptr_5, *quality_ptr_3;
+#ifdef CHECK_ASSERTIONS
+ Chrpos_T genomicpos1, genomicpos2;
+#endif
hit5 = this->hit5;
hit3 = this->hit3;
@@ -9698,12 +10798,28 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
quality_ptr_3 = Shortread_quality_string(queryseq3);
if (hit5->plusp == true) {
- if (clipdir >= 0) {
- pairs = Stage3end_convert_to_pairs(pairs,hit5,queryseq5,clipdir,hardclip5_low,hardclip5_high,
- /*first_read_p*/true,/*queryseq_offset*/0);
- pairs = Stage3end_convert_to_pairs(pairs,hit3,queryseq3,clipdir,hardclip3_low,hardclip3_high,
- /*first_read_p*/false,
- /*queryseq_offset*/querylength5-hardclip5_low-hardclip5_high-hardclip3_low-hardclip3_high);
+ if (clipdir > 0) {
+ pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,clipdir,hardclip5_low,hardclip5_high,/*first_read_p*/true,/*queryseq_offset*/0);
+ pairs5 = strip_gaps_at_head(pairs5);
+
+ pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,clipdir,hardclip3_low,hardclip3_high,/*first_read_p*/false,
+ /*queryseq_offset*/querylength5-hardclip5_low-hardclip5_high-hardclip3_low-hardclip3_high);
+ pairs3 = strip_gaps_at_tail(pairs3);
+
+#ifdef CHECK_ASSERTIONS
+ genomicpos1 = ((Pair_T) List_head(pairs5))->genomepos;
+ genomicpos2 = ((Pair_T) List_last_value(pairs3))->genomepos;
+ if (genomicpos2 != genomicpos1 + 1U) {
+ printf("Accession %s, plus\n",Shortread_accession(queryseq5));
+ printf("Expected genomicpos2 %u == genomicpos1 %u + 1\n",genomicpos2,genomicpos1);
+ Pair_dump_list(pairs5,true);
+ Pair_dump_list(pairs3,true);
+ abort();
+ }
+#endif
+
+ pairs = List_append(pairs3,pairs5);
+
querylengthA = querylength5 - hardclip5_low - hardclip5_high;
querylengthB = querylength3 - hardclip3_low - hardclip3_high;
*querylength_merged = querylengthA + querylengthB;
@@ -9720,12 +10836,28 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
strncpy(&((*quality_merged)[querylengthA]),&(quality_ptr_3[querylength3 - querylengthB]),querylengthB);
}
- } else {
- pairs = Stage3end_convert_to_pairs(pairs,hit3,queryseq3,clipdir,hardclip3_low,hardclip3_high,
- /*first_read_p*/false,/*queryseq_offset*/0);
- pairs = Stage3end_convert_to_pairs(pairs,hit5,queryseq5,clipdir,hardclip5_low,hardclip5_high,
- /*first_read_p*/true,
- /*queryseq_offset*/querylength3-hardclip3_low-hardclip3_high-hardclip5_low-hardclip5_high);
+ } else if (clipdir < 0) {
+ pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,clipdir,hardclip3_low,hardclip3_high,/*first_read_p*/false,/*queryseq_offset*/0);
+ pairs3 = strip_gaps_at_head(pairs3);
+
+ pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,clipdir,hardclip5_low,hardclip5_high,/*first_read_p*/true,
+ /*queryseq_offset*/querylength3-hardclip3_low-hardclip3_high-hardclip5_low-hardclip5_high);
+ pairs5 = strip_gaps_at_tail(pairs5);
+
+#ifdef CHECK_ASSERTIONS
+ genomicpos1 = ((Pair_T) List_head(pairs3))->genomepos;
+ genomicpos2 = ((Pair_T) List_last_value(pairs5))->genomepos;
+ if (genomicpos2 != genomicpos1 + 1U) {
+ printf("Accession %s, plus\n",Shortread_accession(queryseq5));
+ printf("Expected genomicpos2 %u == genomicpos1 %u + 1\n",genomicpos2,genomicpos1);
+ Pair_dump_list(pairs3,true);
+ Pair_dump_list(pairs5,true);
+ abort();
+ }
+#endif
+
+ pairs = List_append(pairs5,pairs3);
+
querylengthA = querylength3 - hardclip3_low - hardclip3_high;
querylengthB = querylength5 - hardclip5_low - hardclip5_high;
*querylength_merged = querylengthA + querylengthB;
@@ -9741,15 +10873,34 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
strncpy(*quality_merged,quality_ptr_3,querylengthA);
strncpy(&((*quality_merged)[querylengthA]),&(quality_ptr_5[querylength5 - querylengthB]),querylengthB);
}
+
+ } else {
+ abort();
}
} else {
- if (clipdir >= 0) {
- pairs = Stage3end_convert_to_pairs(pairs,hit3,queryseq3,clipdir,hardclip3_low,hardclip3_high,
- /*first_read_p*/false,/*queryseq_offset*/0);
- pairs = Stage3end_convert_to_pairs(pairs,hit5,queryseq5,clipdir,hardclip5_low,hardclip5_high,
- /*first_read_p*/true,
+ if (clipdir > 0) {
+ pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,clipdir,hardclip3_low,hardclip3_high,/*first_read_p*/false,/*queryseq_offset*/0);
+ pairs3 = strip_gaps_at_head(pairs3);
+
+ pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,clipdir,hardclip5_low,hardclip5_high,/*first_read_p*/true,
/*queryseq_offset*/querylength3-hardclip3_low-hardclip3_high-hardclip5_low-hardclip5_high);
+ pairs5 = strip_gaps_at_tail(pairs5);
+
+#ifdef CHECK_ASSERTIONS
+ genomicpos1 = ((Pair_T) List_head(pairs3))->genomepos;
+ genomicpos2 = ((Pair_T) List_last_value(pairs5))->genomepos;
+ if (genomicpos2 != genomicpos1 - 1U) {
+ printf("Accession %s, minus\n",Shortread_accession(queryseq5));
+ printf("Expected genomicpos2 %u == genomicpos1 %u - 1\n",genomicpos2,genomicpos1);
+ Pair_dump_list(pairs3,true);
+ Pair_dump_list(pairs5,true);
+ abort();
+ }
+#endif
+
+ pairs = List_append(pairs5,pairs3);
+
querylengthA = querylength3 - hardclip3_low - hardclip3_high;
querylengthB = querylength5 - hardclip5_low - hardclip5_high;
*querylength_merged = querylengthA + querylengthB;
@@ -9766,12 +10917,28 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
strncpy(&((*quality_merged)[querylengthA]),&(quality_ptr_5[querylength5 - querylengthB]),querylengthB);
}
- } else {
- pairs = Stage3end_convert_to_pairs(pairs,hit5,queryseq5,clipdir,hardclip5_low,hardclip5_high,
- /*first_read_p*/true,/*queryseq_offset*/0);
- pairs = Stage3end_convert_to_pairs(pairs,hit3,queryseq3,clipdir,hardclip3_low,hardclip3_high,
- /*first_read_p*/false,
- /*queryseq_offset*/querylength5-hardclip5_low-hardclip5_high-hardclip3_low-hardclip3_high);
+ } else if (clipdir < 0) {
+ pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,clipdir,hardclip5_low,hardclip5_high,/*first_read_p*/true,/*queryseq_offset*/0);
+ pairs5 = strip_gaps_at_head(pairs5);
+
+ pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,clipdir,hardclip3_low,hardclip3_high,/*first_read_p*/false,
+ /*queryseq_offset*/querylength5-hardclip5_low-hardclip5_high-hardclip3_low-hardclip3_high);
+ pairs3 = strip_gaps_at_tail(pairs3);
+
+#ifdef CHECK_ASSERTIONS
+ genomicpos1 = ((Pair_T) List_head(pairs5))->genomepos;
+ genomicpos2 = ((Pair_T) List_last_value(pairs3))->genomepos;
+ if (genomicpos2 != genomicpos1 - 1U) {
+ printf("Accession %s, minus\n",Shortread_accession(queryseq5));
+ printf("Expected genomicpos2 %u == genomicpos1 %u - 1\n",genomicpos2,genomicpos1);
+ Pair_dump_list(pairs5,true);
+ Pair_dump_list(pairs3,true);
+ abort();
+ }
+#endif
+
+ pairs = List_append(pairs3,pairs5);
+
querylengthA = querylength5 - hardclip5_low - hardclip5_high;
querylengthB = querylength3 - hardclip3_low - hardclip3_high;
*querylength_merged = querylengthA + querylengthB;
@@ -9787,6 +10954,9 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
strncpy(*quality_merged,quality_ptr_5,querylengthA);
strncpy(&((*quality_merged)[querylengthA]),&(quality_ptr_3[querylength3 - querylengthB]),querylengthB);
}
+
+ } else {
+ abort();
}
}
diff --git a/src/substring.c b/src/substring.c
index 2e5f828..e794c0c 100644
--- a/src/substring.c
+++ b/src/substring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 161183 2015-03-18 17:04:33Z twu $";
+static char rcsid[] = "$Id: substring.c 161665 2015-03-23 00:03:33Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -241,6 +241,13 @@ struct T {
Univcoord_T genomicstart; /* For region corresponding to entire querylength (if extrapolated) */
Univcoord_T genomicend;
+ Univcoord_T genomicstart_adj; /* Needed by --clip-overlap and
+ --merge-overlap when +/- querypos.
+ Differs only for indels */
+ Univcoord_T genomicend_adj; /* Needed by --clip-overlap and
+ --merge-overlap when +/- querypos.
+ Differs only for indels */
+
Endtype_T start_endtype;
Endtype_T end_endtype;
@@ -318,6 +325,8 @@ Substring_alias_circular (T this) {
this->left += chrlength;
this->genomicstart += chrlength;
this->genomicend += chrlength;
+ this->genomicstart_adj += chrlength;
+ this->genomicend_adj += chrlength;
this->alignstart += chrlength;
this->alignend += chrlength;
this->alignstart_trim += chrlength;
@@ -341,6 +350,8 @@ Substring_unalias_circular (T this) {
this->left -= chrlength;
this->genomicstart -= chrlength;
this->genomicend -= chrlength;
+ this->genomicstart_adj -= chrlength;
+ this->genomicend_adj -= chrlength;
this->alignstart -= chrlength;
this->alignend -= chrlength;
this->alignstart_trim -= chrlength;
@@ -1130,11 +1141,11 @@ Chrpos_T
Substring_insert_length (T substring5, T substring3) {
Univcoord_T pos5, pos3;
- pos5 = substring5->genomicstart;
- debug3(printf("pos5 %u\n",substring5->genomicstart));
+ pos5 = substring5->genomicstart_adj;
+ debug3(printf("pos5 %u\n",substring5->genomicstart_adj));
- pos3 = substring3->genomicend;
- debug3(printf("pos3 %u\n",substring3->genomicend));
+ pos3 = substring3->genomicend_adj;
+ debug3(printf("pos3 %u\n",substring3->genomicend_adj));
if (pos5 > pos3) {
return pos5 - pos3;
@@ -1597,6 +1608,7 @@ T
Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
Univcoord_T chrhigh, Chrpos_T chrlength, Univcoord_T left,
Univcoord_T genomicstart, Univcoord_T genomicend,
+ Univcoord_T genomicstart_adj, Univcoord_T genomicend_adj,
Compress_T query_compress, Endtype_T start_endtype, Endtype_T end_endtype,
int querystart, int queryend, int querylength,
Univcoord_T alignstart, Univcoord_T alignend, int genomiclength,
@@ -1630,6 +1642,8 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
new->left_genomicseg = left;
new->genomicstart = genomicstart;
new->genomicend = genomicend;
+ new->genomicstart_adj = genomicstart_adj;
+ new->genomicend_adj = genomicend_adj;
new->start_endtype = start_endtype;
new->end_endtype = end_endtype;
@@ -2287,6 +2301,11 @@ Substring_genomicstart (T this) {
}
Univcoord_T
+Substring_genomicstart_adj (T this) {
+ return this->genomicstart_adj;
+}
+
+Univcoord_T
Substring_genomicend (T this) {
return this->genomicend;
}
@@ -2431,6 +2450,8 @@ Substring_copy (T old) {
new->left = old->left;
new->genomicstart = old->genomicstart;
new->genomicend = old->genomicend;
+ new->genomicstart_adj = old->genomicstart_adj;
+ new->genomicend_adj = old->genomicend_adj;
new->start_endtype = old->start_endtype;
new->end_endtype = old->end_endtype;
@@ -2576,9 +2597,9 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, i
}
}
- if ((new = Substring_new(donor_nmismatches,chrnum,chroffset,chrhigh,chrlength,
- left,genomicstart,genomicend,query_compress,
- start_endtype,end_endtype,querystart,queryend,querylength,
+ if ((new = Substring_new(donor_nmismatches,chrnum,chroffset,chrhigh,chrlength,left,
+ genomicstart,genomicend,/*genomicstart_adj*/genomicstart,/*genomicend_adj*/genomicend,
+ query_compress,start_endtype,end_endtype,querystart,queryend,querylength,
alignstart,alignend,/*genomiclength*/querylength,
extraleft,extraright,/*exactp*/false,plusp,genestrand,first_read_p,
trim_left_p,trim_right_p,/*minlength*/0)) == NULL) {
@@ -2692,9 +2713,9 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
}
}
- if ((new = Substring_new(acceptor_nmismatches,chrnum,chroffset,chrhigh,chrlength,
- left,genomicstart,genomicend,query_compress,
- start_endtype,end_endtype,querystart,queryend,querylength,
+ if ((new = Substring_new(acceptor_nmismatches,chrnum,chroffset,chrhigh,chrlength,left,
+ genomicstart,genomicend,/*genomicstart_adj*/genomicstart,/*genomicend_adj*/genomicend,
+ query_compress,start_endtype,end_endtype,querystart,queryend,querylength,
alignstart,alignend,/*genomiclength*/querylength,
extraleft,extraright,/*exactp*/false,plusp,genestrand,first_read_p,
trim_left_p,trim_right_p,/*minlength*/0)) == NULL) {
@@ -2781,9 +2802,9 @@ Substring_new_shortexon (Univcoord_T acceptor_coord, int acceptor_knowni, Univco
}
}
- if ((new = Substring_new(nmismatches,chrnum,chroffset,chrhigh,chrlength,
- left,genomicstart,genomicend,query_compress,
- start_endtype,end_endtype,querystart,queryend,querylength,
+ if ((new = Substring_new(nmismatches,chrnum,chroffset,chrhigh,chrlength,left,
+ genomicstart,genomicend,/*genomicstart_adj*/genomicstart,/*genomicend_adj*/genomicend,
+ query_compress,start_endtype,end_endtype,querystart,queryend,querylength,
alignstart,alignend,/*genomiclength*/querylength,
/*extraleft*/2,/*extraright*/2,/*exactp*/false,plusp,genestrand,first_read_p,
/*trim_left_p*/false,/*trim_right_p*/false,
@@ -4268,8 +4289,7 @@ Substring_count_mismatches_region (T this, int trim_left, int trim_right,
List_T
Substring_convert_to_pairs (List_T pairs, T substring, Shortread_T queryseq,
- int clipdir, int hardclip_low, int hardclip_high,
- bool first_read_p, int queryseq_offset) {
+ int hardclip_low, int hardclip_high, int queryseq_offset) {
int querystart, queryend, querypos, i;
Chrpos_T chrpos;
char *seq1;
@@ -4279,7 +4299,6 @@ Substring_convert_to_pairs (List_T pairs, T substring, Shortread_T queryseq,
return pairs;
}
-
seq1 = Shortread_fullpointer_uc(queryseq);
if (substring->plusp == true) {
if (hardclip_low > substring->querystart) {
@@ -4293,11 +4312,14 @@ Substring_convert_to_pairs (List_T pairs, T substring, Shortread_T queryseq,
} else {
queryend = substring->queryend;
}
+ /* Pairs are all zero-based, so do not add 1 */
+ chrpos = substring->genomicstart_adj + querystart - substring->chroffset /*+ 1U*/;
+
debug6(printf("querystart %d, queryend %d, plusp %d\n",querystart,queryend,substring->plusp));
debug6(printf("alignstart %u, alignend %u\n",substring->alignstart_trim - substring->chroffset,
substring->alignend_trim - substring->chroffset));
+ debug6(printf("chrpos %u\n",chrpos));
- chrpos = substring->genomicstart + querystart - substring->chroffset + 1U;
if (substring->genomic_bothdiff == NULL) {
/* Exact match */
for (i = querystart, querypos = queryseq_offset + querystart; i < queryend; i++) {
@@ -4338,11 +4360,14 @@ Substring_convert_to_pairs (List_T pairs, T substring, Shortread_T queryseq,
} else {
queryend = substring->queryend;
}
+ /* For minus, to get 0-based coordinates, subtract 1 */
+ chrpos = substring->genomicstart_adj - querystart - substring->chroffset - 1U;
+
debug6(printf("querystart %d, queryend %d, plusp %d\n",querystart,queryend,substring->plusp));
debug6(printf("alignstart %u, alignend %u\n",substring->alignstart_trim - substring->chroffset,
substring->alignend_trim - substring->chroffset));
+ debug6(printf("chrpos %u\n",chrpos));
- chrpos = substring->genomicstart - querystart - substring->chroffset;
if (substring->genomic_bothdiff == NULL) {
/* Exact match */
for (i = querystart, querypos = queryseq_offset + querystart; i < queryend; i++) {
@@ -4378,7 +4403,7 @@ Substring_convert_to_pairs (List_T pairs, T substring, Shortread_T queryseq,
List_T
Substring_add_insertion (List_T pairs, T substringA, T substringB, int insertionlength, Shortread_T queryseq,
- int clipdir, int hardclip_low, int hardclip_high, bool first_read_p, int queryseq_offset) {
+ int hardclip_low, int hardclip_high, int queryseq_offset) {
int querystartA, queryendA, querystartB, queryendB, querypos, i;
Chrpos_T chrendA;
char *seq1;
@@ -4409,7 +4434,8 @@ Substring_add_insertion (List_T pairs, T substringA, T substringB, int insertion
queryendB = substringB->queryend;
}
- chrendA = substringA->genomicstart + queryendA - substringA->chroffset + 1U;
+ /* Pairs are all zero-based, so do not add 1 */
+ chrendA = substringA->genomicstart_adj + queryendA - substringA->chroffset /*+ 1U*/;
} else {
if (hardclip_high > substringA->querystart) {
@@ -4436,12 +4462,13 @@ Substring_add_insertion (List_T pairs, T substringA, T substringB, int insertion
queryendB = substringB->queryend;
}
- chrendA = substringA->genomicstart - queryendA - substringA->chroffset;
+ /* Pairs are all zero-based, so subtract 1 */
+ chrendA = substringA->genomicstart_adj - queryendA - substringA->chroffset - 1U;
}
if (querystartA <= queryendA && querystartB <= queryendB) {
seq1 = Shortread_fullpointer_uc(queryseq);
- querypos = queryseq_offset + queryendA;
+ querypos = queryendA + queryseq_offset;
i = queryendA;
while (--insertionlength >= 0) {
pairs = List_push_out(pairs,(void *) Pair_new_out(querypos++,/*genomepos*/chrendA,
@@ -4455,8 +4482,8 @@ Substring_add_insertion (List_T pairs, T substringA, T substringB, int insertion
List_T
Substring_add_deletion (List_T pairs, T substringA, T substringB, char *deletion, int deletionlength,
- int clipdir, int hardclip_low, int hardclip_high, bool first_read_p, int queryseq_offset) {
- int querystartA, queryendA, querystartB, queryendB, k;
+ int hardclip_low, int hardclip_high, int queryseq_offset) {
+ int querystartA, queryendA, querystartB, queryendB, querypos, k;
Chrpos_T chrendA;
if (substringA->plusp == true) {
@@ -4484,12 +4511,13 @@ Substring_add_deletion (List_T pairs, T substringA, T substringB, char *deletion
queryendB = substringB->queryend;
}
- chrendA = substringA->genomicstart + queryendA - substringA->chroffset + 1U;
+ /* Pairs are all zero-based, so do not add 1 */
+ chrendA = substringA->genomicstart_adj + queryendA - substringA->chroffset /*+ 1U*/;
if (querystartA < queryendA && querystartB < queryendB) {
- queryendA += queryseq_offset;
+ querypos = queryendA + queryseq_offset;
for (k = 0; k < deletionlength; k++) {
- pairs = List_push_out(pairs,(void *) Pair_new_out(queryendA,/*genomepos*/chrendA++,
+ pairs = List_push_out(pairs,(void *) Pair_new_out(querypos,/*genomepos*/chrendA++,
' ',/*comp*/INDEL_COMP,deletion[k]));
}
}
@@ -4519,12 +4547,13 @@ Substring_add_deletion (List_T pairs, T substringA, T substringB, char *deletion
queryendB = substringB->queryend;
}
- chrendA = substringA->genomicstart - queryendA - substringA->chroffset;
+ /* Pairs are all zero-based, so subtract 1 */
+ chrendA = substringA->genomicstart_adj - queryendA - substringA->chroffset - 1U;
if (querystartA <= queryendA && querystartB <= queryendB) {
- queryendA += queryseq_offset;
+ querypos = queryendA + queryseq_offset;
for (k = 0; k < deletionlength; k++) {
- pairs = List_push_out(pairs,(void *) Pair_new_out(queryendA,/*genomepos*/chrendA--,
+ pairs = List_push_out(pairs,(void *) Pair_new_out(querypos,/*genomepos*/chrendA--,
' ',/*comp*/INDEL_COMP,deletion[k]));
}
}
@@ -4537,9 +4566,8 @@ Substring_add_deletion (List_T pairs, T substringA, T substringB, char *deletion
List_T
Substring_add_intron (List_T pairs, T substringA, T substringB,
- int clipdir, int hardclip_low, int hardclip_high,
- bool first_read_p, int queryseq_offset) {
- int querystartA, queryendA, querystartB, queryendB;
+ int hardclip_low, int hardclip_high, int queryseq_offset) {
+ int querystartA, queryendA, querystartB, queryendB, querypos;
Chrpos_T chrendA;
if (substringA->plusp == true) {
@@ -4567,7 +4595,8 @@ Substring_add_intron (List_T pairs, T substringA, T substringB,
queryendB = substringB->queryend;
}
- chrendA = substringA->genomicstart + queryendA - substringA->chroffset + 1U;
+ /* Pairs are all zero-based, so do not add 1 */
+ chrendA = substringA->genomicstart_adj + queryendA - substringA->chroffset /*+ 1U*/;
} else {
if (hardclip_high > substringA->querystart) {
@@ -4594,13 +4623,15 @@ Substring_add_intron (List_T pairs, T substringA, T substringB,
queryendB = substringB->queryend;
}
- chrendA = substringA->genomicstart - queryendA - substringA->chroffset;
+ /* Pairs are all zero-based, so subtract 1 */
+ chrendA = substringA->genomicstart_adj - queryendA - substringA->chroffset - 1U;
}
if (querystartA <= queryendA && querystartB <= queryendB) {
/* Add gapholder */
/* All we really need for Pair_print_sam is to set gapp to be true */
- pairs = List_push_out(pairs,(void *) Pair_new_out(queryseq_offset + queryendA,/*genomepos*/chrendA,
+ querypos = queryendA + queryseq_offset;
+ pairs = List_push_out(pairs,(void *) Pair_new_out(querypos,/*genomepos*/chrendA,
' ',/*comp*/FWD_CANONICAL_INTRON_COMP,' '));
}
diff --git a/src/substring.h b/src/substring.h
index 38651e3..51e69f9 100644
--- a/src/substring.h
+++ b/src/substring.h
@@ -1,4 +1,4 @@
-/* $Id: substring.h 161183 2015-03-18 17:04:33Z twu $ */
+/* $Id: substring.h 161653 2015-03-22 16:10:53Z twu $ */
#ifndef SUBSTRING_INCLUDED
#define SUBSTRING_INCLUDED
@@ -40,8 +40,9 @@ Substring_unalias_circular (T this);
extern T
Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
- Univcoord_T chrhigh, Chrpos_T chrlength,
- Univcoord_T left, Univcoord_T genomicstart, Univcoord_T genomicend,
+ Univcoord_T chrhigh, Chrpos_T chrlength, Univcoord_T left,
+ Univcoord_T genomicstart, Univcoord_T genomicend,
+ Univcoord_T genomicstart_adj, Univcoord_T genomicend_adj,
Compress_T query_compress, Endtype_T start_endtype, Endtype_T end_endtype,
int querystart, int queryend, int querylength,
Univcoord_T alignstart, Univcoord_T alignend, int genomiclength,
@@ -164,6 +165,8 @@ Substring_left_genomicseg (T this);
extern Univcoord_T
Substring_genomicstart (T this);
extern Univcoord_T
+Substring_genomicstart_adj (T this);
+extern Univcoord_T
Substring_genomicend (T this);
extern Chrpos_T
Substring_genomiclength (T this);
@@ -294,16 +297,16 @@ Substring_count_mismatches_region (T this, int trim_left, int trim_right,
extern List_T
Substring_convert_to_pairs (List_T pairs, T substring, Shortread_T queryseq,
- int clipdir, int hardclip_low, int hardclip_high, bool first_read_p, int queryseq_offset);
+ int hardclip_low, int hardclip_high, int queryseq_offset);
extern List_T
Substring_add_insertion (List_T pairs, T substringA, T substringB, int insertionlength, Shortread_T queryseq,
- int clipdir, int hardclip_low, int hardclip_high, bool first_read_p, int queryseq_offset);
+ int hardclip_low, int hardclip_high, int queryseq_offset);
extern List_T
Substring_add_deletion (List_T pairs, T substringA, T substringB, char *deletion, int deletionlength,
- int clipdir, int hardclip_low, int hardclip_high, bool first_read_p, int queryseq_offset);
+ int hardclip_low, int hardclip_high, int queryseq_offset);
extern List_T
Substring_add_intron (List_T pairs, T substringA, T substringB,
- int clipdir, int hardclip_low, int hardclip_high, bool first_read_p, int queryseq_offset);
+ int hardclip_low, int hardclip_high, int queryseq_offset);
#undef T
#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list