[med-svn] [gmap] 01/04: New upstream version 2017-11-15
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Thu Dec 21 16:11:45 UTC 2017
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit 3e968d2ee29a6e79037cc23cf34a802ed412f022
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Thu Dec 21 14:49:39 2017 +0100
New upstream version 2017-11-15
---
ChangeLog | 17 ++++++
VERSION | 2 +-
configure | 24 ++++----
src/dynprog_end.c | 21 +++++--
src/outbuffer.c | 8 ++-
src/stage1hr.c | 6 +-
src/stage3.c | 178 ++++++++++++++++++++++++++++++++----------------------
7 files changed, 162 insertions(+), 94 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 43227ec..4e99c79 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2017-11-16 twu
+
+ * VERSION, public-2017-09-05, src: Updated version number
+
+ * stage3.c: Merged revision 211227 from trunk to fix issue with trimming of
+ chimeras not being re-extended back to the breakpoint, and to fix issue
+ with CIGAR strings from shortgaps comps being treated differently from
+ indel comps.
+
+ * stage1hr.c: Appending greedy results to subs
+
+ * dynprog_end.c: Merged revision 211220 from trunk to handle the case where
+ rev_goffset is negative
+
+ * outbuffer.c: Merged revision 211219 from trunk to fix fatal bug when
+ trying to write SAM headers to OUTPUT_NONE split output, which is now NULL
+
2017-10-30 twu
* stage3.c: Merged revision 210882 from trunk to fix trim_end_indel
diff --git a/VERSION b/VERSION
index 75cebdc..535447a 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2017-10-30
\ No newline at end of file
+2017-11-15
\ No newline at end of file
diff --git a/configure b/configure
index ad2a907..941ec1c 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for gmap 2017-10-30.
+# Generated by GNU Autoconf 2.69 for gmap 2017-11-15.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -590,8 +590,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2017-10-30'
-PACKAGE_STRING='gmap 2017-10-30'
+PACKAGE_VERSION='2017-11-15'
+PACKAGE_STRING='gmap 2017-11-15'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
PACKAGE_URL=''
@@ -1369,7 +1369,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2017-10-30 to adapt to many kinds of systems.
+\`configure' configures gmap 2017-11-15 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1440,7 +1440,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2017-10-30:";;
+ short | recursive ) echo "Configuration of gmap 2017-11-15:";;
esac
cat <<\_ACEOF
@@ -1577,7 +1577,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2017-10-30
+gmap configure 2017-11-15
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2183,7 +2183,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2017-10-30, which was
+It was created by gmap $as_me 2017-11-15, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2533,8 +2533,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2017-10-30" >&5
-$as_echo "2017-10-30" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2017-11-15" >&5
+$as_echo "2017-11-15" >&6; }
### Read defaults
@@ -4401,7 +4401,7 @@ fi
# Define the identity of the package.
PACKAGE='gmap'
- VERSION='2017-10-30'
+ VERSION='2017-11-15'
cat >>confdefs.h <<_ACEOF
@@ -19978,7 +19978,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2017-10-30, which was
+This file was extended by gmap $as_me 2017-11-15, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -20044,7 +20044,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-gmap config.status 2017-10-30
+gmap config.status 2017-11-15
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
diff --git a/src/dynprog_end.c b/src/dynprog_end.c
index bf1740f..ed034e4 100644
--- a/src/dynprog_end.c
+++ b/src/dynprog_end.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_end.c 205259 2017-04-12 23:55:04Z twu $";
+static char rcsid[] = "$Id: dynprog_end.c 211225 2017-11-16 03:47:12Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1299,7 +1299,7 @@ Dynprog_end5_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
debug6(
printf("%c: ",*dynprogindex > 0 ? (*dynprogindex-1)%26+'a' : (-(*dynprogindex)-1)%26+'A');
- printf("Aligning 5' end gap with endalign %d\n",endalign);
+ printf("Aligning 5' end gap with endalign = %d, roffset %d, goffset %d\n",endalign,roffset,goffset);
);
mismatchtype = ENDQ;
@@ -1327,7 +1327,13 @@ Dynprog_end5_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
debug6(printf("rlength %d is too long. Chopping to %d\n",rlength,dynprog->max_rlength));
rlength = dynprog->max_rlength;
}
- if (glength <= 0) {
+ if (rev_goffset < 0) {
+ /* No room to extend more 5' */
+ debug6(printf("rev_goffset %d < 0, so returning NULL\n",rev_goffset));
+ *nmatches = *nmismatches = *nopens = *nindels = 0;
+ *finalscore = 0;
+ return (List_T) NULL;
+ } else if (glength <= 0) {
/* Needed to avoid abort by Matrix16_alloc */
debug6(printf("glength %d <= 0, so returning NULL\n",glength));
*nmatches = *nmismatches = *nopens = *nindels = 0;
@@ -1342,6 +1348,10 @@ Dynprog_end5_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
debug6(printf("At query offset %d-%d, %.*s\n",rev_roffset-rlength+1,rev_roffset,rlength,&(rev_rsequence[-rlength+1])));
+ /* Situation is handled above */
+ /* assert(rev_goffset >= 0); */
+
+
#ifdef EXTRACT_GENOMICSEG
debug6(printf("At genomic offset %d-%d, %.*s\n",
rev_goffset-glength+1,rev_goffset,glength,&(rev_gsequence[-glength+1])));
@@ -1899,7 +1909,7 @@ Dynprog_end3_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
debug6(
printf("%c: ",*dynprogindex > 0 ? (*dynprogindex-1)%26+'a' : (-(*dynprogindex)-1)%26+'A');
- printf("Aligning 3' end gap with endalign = %d\n",endalign);
+ printf("Aligning 3' end gap with endalign = %d, roffset %d, goffset %d\n",endalign,roffset,goffset);
);
mismatchtype = ENDQ;
@@ -1942,7 +1952,8 @@ Dynprog_end3_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
#ifdef EXTRACT_GENOMICSEG
debug6(printf("At genomic offset %d-%d, %.*s\n",goffset,goffset+glength-1,glength,gsequence));
#endif
-
+ /* Need to assert here, because if rlength is 0, then goffset doesn't matter */
+ assert(goffset >= 0);
gsequence = (char *) MALLOCA((glength+1) * sizeof(char));
gsequence_alt = (char *) MALLOCA((glength+1) * sizeof(char));
diff --git a/src/outbuffer.c b/src/outbuffer.c
index ddc2d14..2839fc4 100644
--- a/src/outbuffer.c
+++ b/src/outbuffer.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: outbuffer.c 210517 2017-10-12 22:41:05Z twu $";
+static char rcsid[] = "$Id: outbuffer.c 211224 2017-11-16 03:46:32Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -147,6 +147,12 @@ print_file_headers (
FILE *output
#endif
) {
+
+ if (output == NULL) {
+ /* Possible since we are no longer creating a file for OUTPUT_NONE */
+ return;
+ }
+
#ifdef GSNAP
if (output_sam_p == true && sam_headers_p == true) {
SAM_header_print_HD(output,nworkers,orderedp);
diff --git a/src/stage1hr.c b/src/stage1hr.c
index 9768290..adaef9c 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 209609 2017-09-01 21:57:26Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 211226 2017-11-16 03:49:07Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -21713,8 +21713,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
debug(printf("After pairing one mismatch, found %d concordant, %d samechr, %d terminals, found_score %d\n",
nconcordant,nsamechr,List_length(*terminals),*found_score));
if (*abort_pairing_p == true) {
- *hits5 = subs5;
- *hits3 = subs3;
+ *hits5 = List_append(greedy5,subs5); /* Was subs5 */
+ *hits3 = List_append(greedy3,subs3); /* Was subs3 */
hitpairs = Stage3pair_remove_circular_alias(hitpairs);
#if 0
hitpairs = Stage3pair_remove_overlaps(hitpairs,/*translocp*/false,/*finalp*/true);
diff --git a/src/stage3.c b/src/stage3.c
index 3aac474..64010f7 100644
--- a/src/stage3.c
+++ b/src/stage3.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 210883 2017-10-30 21:42:56Z twu $";
+static char rcsid[] = "$Id: stage3.c 211229 2017-11-16 03:58:47Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -189,13 +189,6 @@ static const Except_T coordinate_error = {"Coordinate error"};
#define debug1(x)
#endif
-/* Chimeras */
-#ifdef DEBUG2
-#define debug2(x) x
-#else
-#define debug2(x)
-#endif
-
/* path_trim */
#ifdef DEBUG3
#define debug3(x) x
@@ -512,9 +505,9 @@ Stage3_straintype (T this) {
int
Stage3_goodness (T this) {
- debug2(printf("Overall goodness:\n"));
- debug2(printf(" %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => goodness %d\n",
- this->matches,this->mismatches,this->qopens,this->qindels,this->topens,this->tindels,this->goodness));
+ debug(printf("Overall goodness:\n"));
+ debug(printf(" %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => goodness %d\n",
+ this->matches,this->mismatches,this->qopens,this->qindels,this->topens,this->tindels,this->goodness));
return this->goodness;
}
@@ -738,23 +731,23 @@ Stage3_chimeric_goodness (int *matches1, int *matches2, T part1, T part2, int br
ncanonical2, nsemicanonical2, nnoncanonical2;
querystart = Pair_querypos(&(part1->pairarray[0]));
- debug2(printf("Chimeric goodness requested for part %d..%d\n",querystart+1,breakpoint));
+ debug10(printf("Chimeric goodness requested for part %d..%d\n",querystart+1,breakpoint));
Pair_fracidentity_bounded(&(*matches1),&unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
&ncanonical1,&nsemicanonical1,&nnoncanonical1,
part1->pairarray,part1->npairs,part1->cdna_direction,
querystart,breakpoint);
goodness1 = (*matches1) + MISMATCH*mismatches1 + QOPEN*qopens1 + QINDEL*qindels1 + TOPEN*topens1 + TINDEL*tindels1;
- debug2(printf(" %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => %d\n",
+ debug10(printf(" %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => %d\n",
*matches1,mismatches1,qopens1,qindels1,topens1,tindels1,goodness1));
queryend = Pair_querypos(&(part2->pairarray[part2->npairs-1]));
- debug2(printf("Chimeric goodness requested for part %d..%d\n",breakpoint+1,queryend+1));
+ debug10(printf("Chimeric goodness requested for part %d..%d\n",breakpoint+1,queryend+1));
Pair_fracidentity_bounded(&(*matches2),&unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
&ncanonical2,&nsemicanonical2,&nnoncanonical2,
part2->pairarray,part2->npairs,part2->cdna_direction,
breakpoint,queryend);
goodness2 = (*matches2) + MISMATCH*mismatches2 + QOPEN*qopens2 + QINDEL*qindels2 + TOPEN*topens2 + TINDEL*tindels2;
- debug2(printf(" %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => %d\n",
+ debug10(printf(" %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => %d\n",
*matches2,mismatches2,qopens2,qindels2,topens2,tindels2,goodness2));
return goodness1 + goodness2;
@@ -3743,7 +3736,7 @@ trim_end5_indels (List_T pairs, int ambig_end_length,
}
exon = (List_T) NULL;
- while (pairs != NULL && pair->gapp == false && pair->comp != INDEL_COMP) {
+ while (pairs != NULL && pair->gapp == false && pair->comp != INDEL_COMP && pair->comp != SHORTGAP_COMP) {
pairptr = pairs;
pairs = Pairpool_pop(pairs,&pair);
#ifdef WASTE
@@ -3753,7 +3746,7 @@ trim_end5_indels (List_T pairs, int ambig_end_length,
#endif
}
- while (pairs != NULL && pair->gapp == false && ((Pair_T) pairs->first)->comp == INDEL_COMP) {
+ while (pairs != NULL && pair->gapp == false && (((Pair_T) pairs->first)->comp == INDEL_COMP || ((Pair_T) pairs->first)->comp == SHORTGAP_COMP)) {
pairptr = pairs;
pairs = Pairpool_pop(pairs,&pair);
#ifdef WASTE
@@ -3773,7 +3766,7 @@ trim_end5_indels (List_T pairs, int ambig_end_length,
} else {
p = exon;
nindels = 1;
- while (p != NULL && ((Pair_T) p->first)->comp == INDEL_COMP) {
+ while (p != NULL && (((Pair_T) p->first)->comp == INDEL_COMP || ((Pair_T) p->first)->comp == SHORTGAP_COMP)) {
p = List_next(p);
nindels++;
}
@@ -3850,6 +3843,7 @@ trim_end5_indels (List_T pairs, int ambig_end_length,
chroffset,chrhigh,watsonp,jump_late_p,pairpool,
extraband_end,defect_rate,/*endalign*/QUERYEND_NOGAPS,/*require_pos_score_p*/true);
debug(printf("CONTINUOUS AT 5 (trim_end5_indels)?\n"));
+ debug(printf("CONTINUOUS_GAPPAIRS_MEDIALGAP:\n"));
debug(Pair_dump_list(continuous_gappairs_medialgap,true));
debug3(printf("continuous finalscore %d\n",finalscore));
@@ -3893,7 +3887,7 @@ trim_end5_exons (bool *indelp, bool *trim5p, int ambig_end_length, List_T pairs,
List_T path, exon, pairptr, p;
Pair_T pair, splice = NULL, gappair;
int max_nmatches = 0, max_nmismatches;
- int nmatches = 0, nmismatches /* = -1 because of the gap */, i;
+ int nmatches = 0, nmismatches /* = -1 because of the gap */;
int max_score, score;
/* bool nearindelp = false; */
double medial_prob;
@@ -3930,7 +3924,7 @@ trim_end5_exons (bool *indelp, bool *trim5p, int ambig_end_length, List_T pairs,
}
exon = (List_T) NULL;
- while (pairs != NULL && !pair->gapp /*&& pair->comp != INDEL_COMP*/) {
+ while (pairs != NULL && !pair->gapp /*&& pair->comp != INDEL_COMP && pair->comp != SHORTGAP_COMP */) {
pairptr = pairs;
pairs = Pairpool_pop(pairs,&pair);
#ifdef WASTE
@@ -4085,6 +4079,7 @@ trim_end5_exons (bool *indelp, bool *trim5p, int ambig_end_length, List_T pairs,
chroffset,chrhigh,watsonp,jump_late_p,pairpool,
extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS,/*require_pos_score_p*/true);
debug(printf("CONTINUOUS AT 5 (trim_end5_exons)?\n"));
+ debug(printf("CONTINUOUS_GAPPAIRS_MEDIALGAP:\n"));
debug(Pair_dump_list(continuous_gappairs_medialgap,true));
debug3(printf("continuous finalscore %d\n",finalscore));
@@ -4151,7 +4146,7 @@ trim_end3_indels (List_T path, int ambig_end_length,
List_T pairs, exon, pairptr, p;
Pair_T pair, medial;
int max_nmatches = 0, max_nmismatches;
- int nmatches = 0, nmismatches /* = -1 because of the gap */, i;
+ int nmatches = 0, nmismatches /* = -1 because of the gap */;
int max_score, score;
bool nearindelp = false;
int nindels;
@@ -4177,7 +4172,7 @@ trim_end3_indels (List_T path, int ambig_end_length,
}
exon = (List_T) NULL;
- while (path != NULL && pair->gapp == false && pair->comp != INDEL_COMP) {
+ while (path != NULL && pair->gapp == false && pair->comp != INDEL_COMP && pair->comp != SHORTGAP_COMP) {
pairptr = path;
path = Pairpool_pop(path,&pair);
#ifdef WASTE
@@ -4187,7 +4182,7 @@ trim_end3_indels (List_T path, int ambig_end_length,
#endif
}
- while (path != NULL && pair->gapp == false && ((Pair_T) path->first)->comp == INDEL_COMP) {
+ while (path != NULL && pair->gapp == false && (((Pair_T) path->first)->comp == INDEL_COMP || ((Pair_T) path->first)->comp == SHORTGAP_COMP)) {
pairptr = path;
path = Pairpool_pop(path,&pair);
#ifdef WASTE
@@ -4207,7 +4202,7 @@ trim_end3_indels (List_T path, int ambig_end_length,
} else {
p = exon;
nindels = 1;
- while (p != NULL && ((Pair_T) p->first)->comp == INDEL_COMP) {
+ while (p != NULL && (((Pair_T) p->first)->comp == INDEL_COMP || ((Pair_T) p->first)->comp == SHORTGAP_COMP)) {
p = List_next(p);
nindels++;
}
@@ -4285,6 +4280,7 @@ trim_end3_indels (List_T path, int ambig_end_length,
chroffset,chrhigh,watsonp,jump_late_p,pairpool,
extraband_end,defect_rate,/*endalign*/QUERYEND_NOGAPS,/*require_pos_score_p*/true);
debug(printf("CONTINUOUS AT 3 (trim_end3_indels)?\n"));
+ debug(printf("CONTINUOUS_GAPPAIRS_MEDIALGAP:\n"));
debug(Pair_dump_list(continuous_gappairs_medialgap,true));
debug3(printf("continuous finalscore %d\n",finalscore));
@@ -4519,6 +4515,7 @@ trim_end3_exons (bool *indelp, bool *trim3p, int ambig_end_length, List_T path,
chroffset,chrhigh,watsonp,jump_late_p,pairpool,
extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS,/*require_pos_score_p*/true);
debug(printf("CONTINUOUS AT 3 (trim_end3_exons)?\n"));
+ debug(printf("CONTINUOUS_GAPPAIRS_MEDIALGAP:\n"));
debug(Pair_dump_list(continuous_gappairs_medialgap,true));
debug3(printf("continuous finalscore %d\n",finalscore));
@@ -9688,8 +9685,9 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor,
Univcoord_T knownsplice_limit_low, Univcoord_T knownsplice_limit_high,
char *queryseq_ptr, char *queryuc_ptr,
int cdna_direction, bool watsonp, bool jump_late_p, Pairpool_T pairpool,
- Dynprog_T dynprog, int maxpeelback, double defect_rate, Endalign_T endalign) {
- List_T continuous_gappairs_distalgap = NULL, peeled_pairs;
+ Dynprog_T dynprog, int maxpeelback, double defect_rate, Endalign_T endalign,
+ bool forcep) {
+ List_T continuous_gappairs_distalgap = NULL, peeled_pairs = NULL;
int queryjump, genomejump;
int querydp5, querydp3_distalgap;
Chrpos_T genomedp3_distalgap;
@@ -9758,7 +9756,8 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor,
continuous_gappairs_distalgap = Dynprog_end5_gap(&(*dynprogindex_minor),&(*finalscore),
&nmatches,&nmismatches,&nopens,&nindels,dynprog,
&(queryseq_ptr[querydp3_distalgap]),&(queryuc_ptr[querydp3_distalgap]),
- queryjump,genomejump,querydp3_distalgap,genomedp3_distalgap,
+ /*rlength*/queryjump,/*glength*/genomejump,
+ /*rev_roffset*/querydp3_distalgap,/*rev_goffset*/genomedp3_distalgap,
chroffset,chrhigh,watsonp,jump_late_p,pairpool,
extraband_end,defect_rate,endalign,/*require_pos_score_p*/false);
*ambig_end_length = 0;
@@ -9768,7 +9767,7 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor,
debug(printf(" finalscore: %d\n",*finalscore));
if (continuous_gappairs_distalgap == NULL) {
- return (List_T) NULL;
+ return peeled_pairs;
} else {
firstpair = List_head(continuous_gappairs_distalgap);
if (0 && firstpair->querypos != querydp3_distalgap) {
@@ -9776,11 +9775,14 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor,
/* Must have an indel between the gappairs and the rest of the read */
debug(printf("Detected indel between gappairs %d and the rest of the read %d\n",
firstpair->querypos,querydp3_distalgap));
- return (List_T) NULL;
+ return peeled_pairs;
+ } else if (forcep == true) {
+ /* For example, needed for extending middles of chimeras */
+ return continuous_gappairs_distalgap;
} else if (*finalscore <= 0) {
*knownsplicep = false;
- return (List_T) NULL;
+ return peeled_pairs;
} else {
return continuous_gappairs_distalgap;
}
@@ -9910,8 +9912,8 @@ extend_ending3 (bool *knownsplicep, int *dynprogindex_minor, int *finalscore,
char *queryseq_ptr, char *queryuc_ptr,
int cdna_direction, bool watsonp, bool jump_late_p,
Pairpool_T pairpool, Dynprog_T dynprog, int maxpeelback,
- double defect_rate, Endalign_T endalign) {
- List_T continuous_gappairs_distalgap = NULL, peeled_path;
+ double defect_rate, Endalign_T endalign, bool forcep) {
+ List_T continuous_gappairs_distalgap = NULL, peeled_path = NULL;
int queryjump, genomejump;
int querydp5_distalgap, querydp3;
Chrpos_T genomedp5_distalgap;
@@ -9986,7 +9988,7 @@ extend_ending3 (bool *knownsplicep, int *dynprogindex_minor, int *finalscore,
debug(printf(" finalscore: %d\n",*finalscore));
if (continuous_gappairs_distalgap == NULL) {
- return (List_T) NULL;
+ return peeled_path;
} else {
continuous_gappairs_distalgap = List_reverse(continuous_gappairs_distalgap);
firstpair = List_head(continuous_gappairs_distalgap);
@@ -9995,11 +9997,14 @@ extend_ending3 (bool *knownsplicep, int *dynprogindex_minor, int *finalscore,
/* Must have an indel between the gappairs and the rest of the read */
debug(printf("Detected indel between gappairs %d and the rest of the read %d\n",
firstpair->querypos,querydp5_distalgap));
- return (List_T) NULL;
+ return peeled_path;
+ } else if (forcep == true) {
+ /* For example, needed for extending middle of chimeras */
+ return continuous_gappairs_distalgap;
} else if (*finalscore <= 0) {
*knownsplicep = false;
- return (List_T) NULL;
+ return peeled_path;
} else {
return continuous_gappairs_distalgap;
}
@@ -10852,26 +10857,30 @@ traverse_dual_break (List_T pairs, List_T *path, Pair_T leftpair, Pair_T rightpa
chrend = (chrhigh - chroffset) - genomedp5;
}
- assert(chrend >= chrstart);
-
- debug14(printf("Starting stage2 with chrstart %u, chrend %u, watsonp %d\n",
- chrstart,chrend,watsonp));
- gappairs = Stage2_compute_one(
+ /* chrend can be equal to chrstart - 1 if peelback fails on both ends */
+ assert(chrend + 1 >= chrstart);
+ if (chrend < chrstart) {
+ gappairs = (List_T) NULL;
+ } else {
+ debug14(printf("Starting stage2 with chrstart %u, chrend %u, watsonp %d\n",
+ chrstart,chrend,watsonp));
+ gappairs = Stage2_compute_one(
#ifdef PMAP
- &(queryaaseq_ptr[querydp5]),&(queryaaseq_ptr[querydp5]),
- /*querylength*/querydp3-querydp5+1,/*query_offset*/querydp5*3,
+ &(queryaaseq_ptr[querydp5]),&(queryaaseq_ptr[querydp5]),
+ /*querylength*/querydp3-querydp5+1,/*query_offset*/querydp5*3,
#else
- &(queryseq_ptr[querydp5]),&(queryuc_ptr[querydp5]),
- /*querylength*/querydp3-querydp5+1,/*query_offset*/querydp5,
+ &(queryseq_ptr[querydp5]),&(queryuc_ptr[querydp5]),
+ /*querylength*/querydp3-querydp5+1,/*query_offset*/querydp5,
#endif
- chrstart,chrend,chroffset,chrhigh,/*plusp*/watsonp,genestrand,
+ chrstart,chrend,chroffset,chrhigh,/*plusp*/watsonp,genestrand,
- oligoindices_minor,pairpool,diagpool,cellpool,
- /*localp should be false*/true,/*skip_repetitive_p*/false,
- /*favor_right_p*/false,/*debug_graphic_p*/false);
-
- debug14(printf("Internal stage2 result:\n"));
- debug14(Pair_dump_list(gappairs,true));
+ oligoindices_minor,pairpool,diagpool,cellpool,
+ /*localp should be false*/true,/*skip_repetitive_p*/false,
+ /*favor_right_p*/false,/*debug_graphic_p*/false);
+
+ debug14(printf("Internal stage2 result:\n"));
+ debug14(Pair_dump_list(gappairs,true));
+ }
if (gappairs == NULL) {
pairs = Pairpool_transfer(pairs,peeled_pairs);
@@ -11028,7 +11037,7 @@ build_path_end3 (bool *knownsplicep, int *ambig_end_length_3, Splicetype_T *ambi
char *queryseq_ptr, char *queryuc_ptr,
int cdna_direction, bool watsonp, bool jump_late_p, int maxpeelback,
double defect_rate, Pairpool_T pairpool, Dynprog_T dynprogL,
- bool extendp, Endalign_T endalign) {
+ bool extendp, Endalign_T endalign, bool forcep) {
List_T gappairs;
Pair_T leftpair;
/* int genomejump */
@@ -11093,7 +11102,7 @@ build_path_end3 (bool *knownsplicep, int *ambig_end_length_3, Splicetype_T *ambi
chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,pairpool,dynprogL,maxpeelback,
- defect_rate,endalign);
+ defect_rate,endalign,forcep);
} else {
/* Looks like we aren't calling this anymore */
abort();
@@ -11139,7 +11148,7 @@ build_pairs_end5 (bool *knownsplicep, int *ambig_end_length_5, Splicetype_T *amb
char *queryseq_ptr, char *queryuc_ptr,
int cdna_direction, bool watsonp, bool jump_late_p, int maxpeelback,
double defect_rate, Pairpool_T pairpool, Dynprog_T dynprogR,
- bool extendp, Endalign_T endalign) {
+ bool extendp, Endalign_T endalign, bool forcep) {
List_T gappairs;
Pair_T rightpair;
int queryjump, leftquerypos;
@@ -11200,7 +11209,7 @@ build_pairs_end5 (bool *knownsplicep, int *ambig_end_length_5, Splicetype_T *amb
chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,pairpool,dynprogR,
- maxpeelback,defect_rate,endalign);
+ maxpeelback,defect_rate,endalign,forcep);
} else {
/* Looks like we aren't calling this anymore */
abort();
@@ -13364,8 +13373,7 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,
maxpeelback,defect_rate,pairpool,dynprogR,
- /*extendp*/true,/*endalign*/QUERYEND_GAP);
-
+ /*extendp*/true,/*endalign*/QUERYEND_GAP,/*forcep*/false);
/* Necessary to insert gaps and assign gap types (fills in cDNA
@@ -13412,7 +13420,7 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,
maxpeelback,defect_rate,pairpool,dynprogR,/*extendp*/true,
- /*endalign*/BEST_LOCAL);
+ /*endalign*/BEST_LOCAL,/*forcep*/false);
debug3(printf("AFTER 5' REBUILD\n"));
debug3(Pair_dump_list(pairs,true));
}
@@ -13432,7 +13440,7 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,
maxpeelback,defect_rate,pairpool,dynprogR,/*extendp*/true,
- /*endalign*/QUERYEND_INDELS);
+ /*endalign*/QUERYEND_INDELS,/*forcep*/false);
#endif
}
@@ -13446,7 +13454,7 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,
maxpeelback,defect_rate,pairpool,dynprogR,
- /*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
+ /*extendp*/true,/*endalign*/QUERYEND_NOGAPS,/*forcep*/false);
debug(Pair_dump_list(pairs,true));
debug(printf("End of path_compute_end5\n"));
@@ -13514,7 +13522,7 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,
maxpeelback,defect_rate,pairpool,dynprogL,
- /*extendp*/true,/*endalign*/QUERYEND_GAP);
+ /*extendp*/true,/*endalign*/QUERYEND_GAP,/*forcep*/false);
/* Necessary to insert gaps and assign gap types (fills in cDNA
insertions, so they don't get trimmed), in case an insertion was
@@ -13561,7 +13569,7 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,
maxpeelback,defect_rate,pairpool,dynprogL,/*extendp*/true,
- /*endalign*/BEST_LOCAL);
+ /*endalign*/BEST_LOCAL,/*forcep*/false);
debug3(printf("AFTER 3' REBUILD\n"));
debug3(Pair_dump_list(path,true));
}
@@ -13580,7 +13588,7 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,
maxpeelback,defect_rate,pairpool,dynprogL,/*extendp*/true,
- /*endalign*/QUERYEND_NOGAPS);
+ /*endalign*/QUERYEND_NOGAPS,/*forcep*/false);
#endif
}
@@ -13594,7 +13602,7 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,
maxpeelback,defect_rate,pairpool,dynprogL,
- /*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
+ /*extendp*/true,/*endalign*/QUERYEND_NOGAPS,/*forcep*/false);
debug(Pair_dump_list(path,true));
debug(printf("End of path_compute_end3\n"));
@@ -15024,7 +15032,7 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
queryseq_ptr,queryuc_ptr,
*cdna_direction,watsonp,jump_late_p,
maxpeelback,defect_rate,pairpool,dynprogR,
- /*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
+ /*extendp*/true,/*endalign*/QUERYEND_NOGAPS,/*forcep*/false);
pairs = trim_end5_exons(&indelp,&trim5p,*ambig_end_length_5,pairs,dynprogR,chroffset,chrhigh,
queryseq_ptr,queryuc_ptr,querylength,*cdna_direction,watsonp,
jump_late_p,pairpool,defect_rate);
@@ -15049,7 +15057,7 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
queryseq_ptr,queryuc_ptr,
*cdna_direction,watsonp,jump_late_p,
maxpeelback,defect_rate,pairpool,dynprogL,
- /*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
+ /*extendp*/true,/*endalign*/QUERYEND_NOGAPS,/*forcep*/false);
path = trim_end3_exons(&indelp,&trim3p,*ambig_end_length_3,path,dynprogL,chroffset,chrhigh,
queryseq_ptr,queryuc_ptr,querylength,
*cdna_direction,watsonp,jump_late_p,pairpool,defect_rate);
@@ -15954,13 +15962,31 @@ Stage3_merge_chimera (T this_left, T this_right,
Chrpos_T genomedp5, genomedp3;
bool protectedp;
+ debug10(printf("Entered stage3_merge_chimera with minpos1 %d, maxpos1 %d, minpos2 %d, maxpos2 %d\n",
+ minpos1,maxpos1,minpos2,maxpos2));
orig_left_pairs = Pairpool_copy(this_left->pairs,pairpool);
orig_right_pairs = Pairpool_copy(this_right->pairs,pairpool);
+#ifdef DEBUG10
+ printf("ORIG LEFT PAIRS:\n");
+ Pair_dump_list(orig_left_pairs,true);
+ printf("ORIG RIGHT PAIRS:\n");
+ Pair_dump_list(orig_right_pairs,true);
+#endif
+
+
this_left->pairs = Pair_clip_bounded_list(this_left->pairs,minpos1,maxpos1);
this_right->pairs = Pair_clip_bounded_list(this_right->pairs,minpos2,maxpos2);
+#ifdef DEBUG10
+ printf("CLIPPED LEFT PAIRS:\n");
+ Pair_dump_list(this_left->pairs,true);
+ printf("CLIPPED RIGHT PAIRS:\n");
+ Pair_dump_list(this_right->pairs,true);
+#endif
+
+
if (this_left->pairs == NULL && this_right->pairs == NULL) {
this_left->pairs = orig_left_pairs;
this_right->pairs = orig_right_pairs;
@@ -15983,15 +16009,17 @@ Stage3_merge_chimera (T this_left, T this_right,
} else {
path = List_reverse(this_left->pairs);
- /* To avoid indels at chimeric join, need to peelback, clean ends, extend with nogaps, and then clip*/
+ /* To avoid indels at chimeric join, need to peelback, clean ends, extend with nogaps, and then re-clip*/
endpair = (Pair_T) path->first;
querydp5 = endpair->querypos + 1;
genomedp5 = endpair->genomepos + 1;
protectedp = false;
path = peel_leftward(&n_peeled_indels,&protectedp,&peeled_path,path,&querydp5,&genomedp5,
- maxpeelback,/*stop_at_indels_p*/false);
+ maxpeelback,/*stop_at_indels_p*/true);
path = clean_path_end3_gap_indels(path);
+ /* Have to use forcep == true, because we cannot put back the
+ pairs removed from peel_leftward and clean_path_end3_gap_indels */
path = build_path_end3(&knownsplicep,&ambig_end_length_3,&ambig_splicetype,&ambig_prob_3,
&chop_exon_p,&dynprogindex_minor,path,
this_left->chroffset,this_left->chrhigh,/*querylength*/maxpos1+1,
@@ -16000,12 +16028,14 @@ Stage3_merge_chimera (T this_left, T this_right,
this_left->cdna_direction,this_left->watsonp,
/*jump_late_p*/this_left->watsonp ? false : true,
maxpeelback,/*defect_rate*/0.0,pairpool,dynprogL,
- /*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
+ /*extendp*/true,/*endalign*/QUERYEND_NOGAPS,/*forcep*/true);
this_left->pairs = List_reverse(path);
this_left->pairs = Pair_clip_bounded_list(this_left->pairs,minpos1,maxpos1);
+ debug10(printf("CLEANED LEFT PAIRS:\n"));
+ debug10(Pair_dump_list(this_left->pairs,true));
- /* To avoid indels at chimeric join, need to peelback, clean ends, extend with nogaps, and then clip*/
+ /* To avoid indels at chimeric join, need to peelback, clean ends, extend with nogaps, and then re-clip*/
pairs = this_right->pairs;
endpair = (Pair_T) pairs->first;
@@ -16013,9 +16043,11 @@ Stage3_merge_chimera (T this_left, T this_right,
genomedp3 = endpair->genomepos - 1;
protectedp = false;
pairs = peel_rightward(&n_peeled_indels,&protectedp,&peeled_pairs,pairs,&querydp3,&genomedp3,
- maxpeelback,/*stop_at_indels_p*/false);
+ maxpeelback,/*stop_at_indels_p*/true);
pairs = clean_pairs_end5_gap_indels(pairs);
+ /* Have to use forcep == true, because we cannot put back the
+ pairs removed from peel_rightward and clean_pairs_end5_gap_indels */
pairs = build_pairs_end5(&knownsplicep,&ambig_end_length_5,&ambig_splicetype,&ambig_prob_5,
&chop_exon_p,&dynprogindex_minor,pairs,
this_right->chroffset,this_right->chrhigh,
@@ -16024,8 +16056,10 @@ Stage3_merge_chimera (T this_left, T this_right,
this_right->cdna_direction,this_right->watsonp,
/*jump_late_p*/this_right->watsonp ? false : true,
maxpeelback,/*defect_rate*/0.0,pairpool,dynprogR,
- /*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
+ /*extendp*/true,/*endalign*/QUERYEND_NOGAPS,/*forcep*/true);
this_right->pairs = Pair_clip_bounded_list(pairs,minpos2,maxpos2);
+ debug10(printf("CLEANED RIGHT PAIRS:\n"));
+ debug10(Pair_dump_list(this_right->pairs,true));
if (this_left->pairs == NULL || this_right->pairs == NULL) {
this_left->pairs = orig_left_pairs;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list