[med-svn] [gmap] 01/04: New upstream version 2017-11-15

Alex Mestiashvili malex-guest at moszumanska.debian.org
Thu Dec 21 16:11:45 UTC 2017


This is an automated email from the git hooks/post-receive script.

malex-guest pushed a commit to branch master
in repository gmap.

commit 3e968d2ee29a6e79037cc23cf34a802ed412f022
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date:   Thu Dec 21 14:49:39 2017 +0100

    New upstream version 2017-11-15
---
 ChangeLog         |  17 ++++++
 VERSION           |   2 +-
 configure         |  24 ++++----
 src/dynprog_end.c |  21 +++++--
 src/outbuffer.c   |   8 ++-
 src/stage1hr.c    |   6 +-
 src/stage3.c      | 178 ++++++++++++++++++++++++++++++++----------------------
 7 files changed, 162 insertions(+), 94 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 43227ec..4e99c79 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2017-11-16  twu
+
+    * VERSION, public-2017-09-05, src: Updated version number
+
+    * stage3.c: Merged revision 211227 from trunk to fix issue with trimming of
+      chimeras not being re-extended back to the breakpoint, and to fix issue
+      with CIGAR strings from shortgaps comps being treated differently from
+      indel comps.
+
+    * stage1hr.c: Appending greedy results to subs
+
+    * dynprog_end.c: Merged revision 211220 from trunk to handle the case where
+      rev_goffset is negative
+
+    * outbuffer.c: Merged revision 211219 from trunk to fix fatal bug when
+      trying to write SAM headers to OUTPUT_NONE split output, which is now NULL
+
 2017-10-30  twu
 
     * stage3.c: Merged revision 210882 from trunk to fix trim_end_indel
diff --git a/VERSION b/VERSION
index 75cebdc..535447a 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2017-10-30
\ No newline at end of file
+2017-11-15
\ No newline at end of file
diff --git a/configure b/configure
index ad2a907..941ec1c 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for gmap 2017-10-30.
+# Generated by GNU Autoconf 2.69 for gmap 2017-11-15.
 #
 # Report bugs to <Thomas Wu <twu at gene.com>>.
 #
@@ -590,8 +590,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='gmap'
 PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2017-10-30'
-PACKAGE_STRING='gmap 2017-10-30'
+PACKAGE_VERSION='2017-11-15'
+PACKAGE_STRING='gmap 2017-11-15'
 PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
 PACKAGE_URL=''
 
@@ -1369,7 +1369,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures gmap 2017-10-30 to adapt to many kinds of systems.
+\`configure' configures gmap 2017-11-15 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1440,7 +1440,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of gmap 2017-10-30:";;
+     short | recursive ) echo "Configuration of gmap 2017-11-15:";;
    esac
   cat <<\_ACEOF
 
@@ -1577,7 +1577,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-gmap configure 2017-10-30
+gmap configure 2017-11-15
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2183,7 +2183,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by gmap $as_me 2017-10-30, which was
+It was created by gmap $as_me 2017-11-15, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -2533,8 +2533,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5
 $as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2017-10-30" >&5
-$as_echo "2017-10-30" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2017-11-15" >&5
+$as_echo "2017-11-15" >&6; }
 
 
 ### Read defaults
@@ -4401,7 +4401,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='gmap'
- VERSION='2017-10-30'
+ VERSION='2017-11-15'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -19978,7 +19978,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by gmap $as_me 2017-10-30, which was
+This file was extended by gmap $as_me 2017-11-15, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -20044,7 +20044,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-gmap config.status 2017-10-30
+gmap config.status 2017-11-15
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/src/dynprog_end.c b/src/dynprog_end.c
index bf1740f..ed034e4 100644
--- a/src/dynprog_end.c
+++ b/src/dynprog_end.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_end.c 205259 2017-04-12 23:55:04Z twu $";
+static char rcsid[] = "$Id: dynprog_end.c 211225 2017-11-16 03:47:12Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -1299,7 +1299,7 @@ Dynprog_end5_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
 
   debug6(
 	printf("%c:  ",*dynprogindex > 0 ? (*dynprogindex-1)%26+'a' : (-(*dynprogindex)-1)%26+'A');
-	printf("Aligning 5' end gap with endalign %d\n",endalign);
+	printf("Aligning 5' end gap with endalign = %d, roffset %d, goffset %d\n",endalign,roffset,goffset);
 	);
 
   mismatchtype = ENDQ;
@@ -1327,7 +1327,13 @@ Dynprog_end5_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
     debug6(printf("rlength %d is too long.  Chopping to %d\n",rlength,dynprog->max_rlength));
     rlength = dynprog->max_rlength;
   }
-  if (glength <= 0) {
+  if (rev_goffset < 0) {
+    /* No room to extend more 5' */
+    debug6(printf("rev_goffset %d < 0, so returning NULL\n",rev_goffset));
+    *nmatches = *nmismatches = *nopens = *nindels = 0;
+    *finalscore = 0;
+    return (List_T) NULL;
+  } else if (glength <= 0) {
     /* Needed to avoid abort by Matrix16_alloc */
     debug6(printf("glength %d <= 0, so returning NULL\n",glength));
     *nmatches = *nmismatches = *nopens = *nindels = 0;
@@ -1342,6 +1348,10 @@ Dynprog_end5_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
 
   debug6(printf("At query offset %d-%d, %.*s\n",rev_roffset-rlength+1,rev_roffset,rlength,&(rev_rsequence[-rlength+1])));
 
+  /* Situation is handled above */
+  /* assert(rev_goffset >= 0); */
+
+
 #ifdef EXTRACT_GENOMICSEG
   debug6(printf("At genomic offset %d-%d, %.*s\n",
 		rev_goffset-glength+1,rev_goffset,glength,&(rev_gsequence[-glength+1])));
@@ -1899,7 +1909,7 @@ Dynprog_end3_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
 
   debug6(
 	printf("%c:  ",*dynprogindex > 0 ? (*dynprogindex-1)%26+'a' : (-(*dynprogindex)-1)%26+'A');
-	printf("Aligning 3' end gap with endalign = %d\n",endalign);
+	printf("Aligning 3' end gap with endalign = %d, roffset %d, goffset %d\n",endalign,roffset,goffset);
 	);
 
   mismatchtype = ENDQ;
@@ -1942,7 +1952,8 @@ Dynprog_end3_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
 #ifdef EXTRACT_GENOMICSEG
   debug6(printf("At genomic offset %d-%d, %.*s\n",goffset,goffset+glength-1,glength,gsequence));
 #endif
-
+  /* Need to assert here, because if rlength is 0, then goffset doesn't matter */
+  assert(goffset >= 0);
 
   gsequence = (char *) MALLOCA((glength+1) * sizeof(char));
   gsequence_alt = (char *) MALLOCA((glength+1) * sizeof(char));
diff --git a/src/outbuffer.c b/src/outbuffer.c
index ddc2d14..2839fc4 100644
--- a/src/outbuffer.c
+++ b/src/outbuffer.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: outbuffer.c 210517 2017-10-12 22:41:05Z twu $";
+static char rcsid[] = "$Id: outbuffer.c 211224 2017-11-16 03:46:32Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -147,6 +147,12 @@ print_file_headers (
 		    FILE *output
 #endif
 		    ) {
+
+  if (output == NULL) {
+    /* Possible since we are no longer creating a file for OUTPUT_NONE */
+    return;
+  }
+
 #ifdef GSNAP
   if (output_sam_p == true && sam_headers_p == true) {
     SAM_header_print_HD(output,nworkers,orderedp);
diff --git a/src/stage1hr.c b/src/stage1hr.c
index 9768290..adaef9c 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 209609 2017-09-01 21:57:26Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 211226 2017-11-16 03:49:07Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -21713,8 +21713,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
       debug(printf("After pairing one mismatch, found %d concordant, %d samechr, %d terminals, found_score %d\n",
 		   nconcordant,nsamechr,List_length(*terminals),*found_score));
       if (*abort_pairing_p == true) {
-	*hits5 = subs5;
-	*hits3 = subs3;
+	*hits5 = List_append(greedy5,subs5); /* Was subs5 */
+	*hits3 = List_append(greedy3,subs3); /* Was subs3 */
 	hitpairs = Stage3pair_remove_circular_alias(hitpairs);
 #if 0
 	hitpairs = Stage3pair_remove_overlaps(hitpairs,/*translocp*/false,/*finalp*/true);
diff --git a/src/stage3.c b/src/stage3.c
index 3aac474..64010f7 100644
--- a/src/stage3.c
+++ b/src/stage3.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 210883 2017-10-30 21:42:56Z twu $";
+static char rcsid[] = "$Id: stage3.c 211229 2017-11-16 03:58:47Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -189,13 +189,6 @@ static const Except_T coordinate_error = {"Coordinate error"};
 #define debug1(x)
 #endif
 
-/* Chimeras */
-#ifdef DEBUG2
-#define debug2(x) x
-#else
-#define debug2(x)
-#endif
-
 /* path_trim */
 #ifdef DEBUG3
 #define debug3(x) x
@@ -512,9 +505,9 @@ Stage3_straintype (T this) {
 
 int
 Stage3_goodness (T this) {
-  debug2(printf("Overall goodness:\n"));
-  debug2(printf("  %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => goodness %d\n",
-		this->matches,this->mismatches,this->qopens,this->qindels,this->topens,this->tindels,this->goodness));
+  debug(printf("Overall goodness:\n"));
+  debug(printf("  %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => goodness %d\n",
+	       this->matches,this->mismatches,this->qopens,this->qindels,this->topens,this->tindels,this->goodness));
 
   return this->goodness;
 }
@@ -738,23 +731,23 @@ Stage3_chimeric_goodness (int *matches1, int *matches2, T part1, T part2, int br
     ncanonical2, nsemicanonical2, nnoncanonical2;
 
   querystart = Pair_querypos(&(part1->pairarray[0]));
-  debug2(printf("Chimeric goodness requested for part %d..%d\n",querystart+1,breakpoint));
+  debug10(printf("Chimeric goodness requested for part %d..%d\n",querystart+1,breakpoint));
   Pair_fracidentity_bounded(&(*matches1),&unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
 			    &ncanonical1,&nsemicanonical1,&nnoncanonical1,
 			    part1->pairarray,part1->npairs,part1->cdna_direction,
 			    querystart,breakpoint);
   goodness1 = (*matches1) + MISMATCH*mismatches1 + QOPEN*qopens1 + QINDEL*qindels1 + TOPEN*topens1 + TINDEL*tindels1;
-  debug2(printf("  %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => %d\n",
+  debug10(printf("  %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => %d\n",
 		*matches1,mismatches1,qopens1,qindels1,topens1,tindels1,goodness1));
 
   queryend = Pair_querypos(&(part2->pairarray[part2->npairs-1]));
-  debug2(printf("Chimeric goodness requested for part %d..%d\n",breakpoint+1,queryend+1));
+  debug10(printf("Chimeric goodness requested for part %d..%d\n",breakpoint+1,queryend+1));
   Pair_fracidentity_bounded(&(*matches2),&unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
 			    &ncanonical2,&nsemicanonical2,&nnoncanonical2,
 			    part2->pairarray,part2->npairs,part2->cdna_direction,
 			    breakpoint,queryend);
   goodness2 = (*matches2) + MISMATCH*mismatches2 + QOPEN*qopens2 + QINDEL*qindels2 + TOPEN*topens2 + TINDEL*tindels2;
-  debug2(printf("  %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => %d\n",
+  debug10(printf("  %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels => %d\n",
 		*matches2,mismatches2,qopens2,qindels2,topens2,tindels2,goodness2));
 
   return goodness1 + goodness2;
@@ -3743,7 +3736,7 @@ trim_end5_indels (List_T pairs, int ambig_end_length,
   }
 
   exon = (List_T) NULL;
-  while (pairs != NULL && pair->gapp == false && pair->comp != INDEL_COMP) {
+  while (pairs != NULL && pair->gapp == false && pair->comp != INDEL_COMP && pair->comp != SHORTGAP_COMP) {
     pairptr = pairs;
     pairs = Pairpool_pop(pairs,&pair);
 #ifdef WASTE
@@ -3753,7 +3746,7 @@ trim_end5_indels (List_T pairs, int ambig_end_length,
 #endif
   }
 
-  while (pairs != NULL && pair->gapp == false && ((Pair_T) pairs->first)->comp == INDEL_COMP) {
+  while (pairs != NULL && pair->gapp == false && (((Pair_T) pairs->first)->comp == INDEL_COMP || ((Pair_T) pairs->first)->comp == SHORTGAP_COMP)) {
     pairptr = pairs;
     pairs = Pairpool_pop(pairs,&pair);
 #ifdef WASTE
@@ -3773,7 +3766,7 @@ trim_end5_indels (List_T pairs, int ambig_end_length,
   } else {
     p = exon;
     nindels = 1;
-    while (p != NULL && ((Pair_T) p->first)->comp == INDEL_COMP) {
+    while (p != NULL && (((Pair_T) p->first)->comp == INDEL_COMP || ((Pair_T) p->first)->comp == SHORTGAP_COMP)) {
       p = List_next(p);
       nindels++;
     }
@@ -3850,6 +3843,7 @@ trim_end5_indels (List_T pairs, int ambig_end_length,
 						       chroffset,chrhigh,watsonp,jump_late_p,pairpool,
 						       extraband_end,defect_rate,/*endalign*/QUERYEND_NOGAPS,/*require_pos_score_p*/true);
       debug(printf("CONTINUOUS AT 5 (trim_end5_indels)?\n"));
+      debug(printf("CONTINUOUS_GAPPAIRS_MEDIALGAP:\n"));
       debug(Pair_dump_list(continuous_gappairs_medialgap,true));
       debug3(printf("continuous finalscore %d\n",finalscore));
 
@@ -3893,7 +3887,7 @@ trim_end5_exons (bool *indelp, bool *trim5p, int ambig_end_length, List_T pairs,
   List_T path, exon, pairptr, p;
   Pair_T pair, splice = NULL, gappair;
   int max_nmatches = 0, max_nmismatches;
-  int nmatches = 0, nmismatches /* = -1 because of the gap */, i;
+  int nmatches = 0, nmismatches /* = -1 because of the gap */;
   int max_score, score;
   /* bool nearindelp = false; */
   double medial_prob;
@@ -3930,7 +3924,7 @@ trim_end5_exons (bool *indelp, bool *trim5p, int ambig_end_length, List_T pairs,
   }
 
   exon = (List_T) NULL;
-  while (pairs != NULL && !pair->gapp /*&& pair->comp != INDEL_COMP*/) {
+  while (pairs != NULL && !pair->gapp /*&& pair->comp != INDEL_COMP && pair->comp != SHORTGAP_COMP */) {
     pairptr = pairs;
     pairs = Pairpool_pop(pairs,&pair);
 #ifdef WASTE
@@ -4085,6 +4079,7 @@ trim_end5_exons (bool *indelp, bool *trim5p, int ambig_end_length, List_T pairs,
 						       chroffset,chrhigh,watsonp,jump_late_p,pairpool,
 						       extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS,/*require_pos_score_p*/true);
       debug(printf("CONTINUOUS AT 5 (trim_end5_exons)?\n"));
+      debug(printf("CONTINUOUS_GAPPAIRS_MEDIALGAP:\n"));
       debug(Pair_dump_list(continuous_gappairs_medialgap,true));
       debug3(printf("continuous finalscore %d\n",finalscore));
 
@@ -4151,7 +4146,7 @@ trim_end3_indels (List_T path, int ambig_end_length,
   List_T pairs, exon, pairptr, p;
   Pair_T pair, medial;
   int max_nmatches = 0, max_nmismatches;
-  int nmatches = 0, nmismatches /* = -1 because of the gap */, i;
+  int nmatches = 0, nmismatches /* = -1 because of the gap */;
   int max_score, score;
   bool nearindelp = false;
   int nindels;
@@ -4177,7 +4172,7 @@ trim_end3_indels (List_T path, int ambig_end_length,
   }
 
   exon = (List_T) NULL;
-  while (path != NULL && pair->gapp == false && pair->comp != INDEL_COMP) {
+  while (path != NULL && pair->gapp == false && pair->comp != INDEL_COMP && pair->comp != SHORTGAP_COMP) {
     pairptr = path;
     path = Pairpool_pop(path,&pair);
 #ifdef WASTE
@@ -4187,7 +4182,7 @@ trim_end3_indels (List_T path, int ambig_end_length,
 #endif
   }
 
-  while (path != NULL && pair->gapp == false && ((Pair_T) path->first)->comp == INDEL_COMP) {
+  while (path != NULL && pair->gapp == false && (((Pair_T) path->first)->comp == INDEL_COMP || ((Pair_T) path->first)->comp == SHORTGAP_COMP)) {
     pairptr = path;
     path = Pairpool_pop(path,&pair);
 #ifdef WASTE
@@ -4207,7 +4202,7 @@ trim_end3_indels (List_T path, int ambig_end_length,
   } else {
     p = exon;
     nindels = 1;
-    while (p != NULL && ((Pair_T) p->first)->comp == INDEL_COMP) {
+    while (p != NULL && (((Pair_T) p->first)->comp == INDEL_COMP || ((Pair_T) p->first)->comp == SHORTGAP_COMP)) {
       p = List_next(p);
       nindels++;
     }
@@ -4285,6 +4280,7 @@ trim_end3_indels (List_T path, int ambig_end_length,
 						       chroffset,chrhigh,watsonp,jump_late_p,pairpool,
 						       extraband_end,defect_rate,/*endalign*/QUERYEND_NOGAPS,/*require_pos_score_p*/true);
       debug(printf("CONTINUOUS AT 3 (trim_end3_indels)?\n"));
+      debug(printf("CONTINUOUS_GAPPAIRS_MEDIALGAP:\n"));
       debug(Pair_dump_list(continuous_gappairs_medialgap,true));
       debug3(printf("continuous finalscore %d\n",finalscore));
 
@@ -4519,6 +4515,7 @@ trim_end3_exons (bool *indelp, bool *trim3p, int ambig_end_length, List_T path,
 						       chroffset,chrhigh,watsonp,jump_late_p,pairpool,
 						       extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS,/*require_pos_score_p*/true);
       debug(printf("CONTINUOUS AT 3 (trim_end3_exons)?\n"));
+      debug(printf("CONTINUOUS_GAPPAIRS_MEDIALGAP:\n"));
       debug(Pair_dump_list(continuous_gappairs_medialgap,true));
       debug3(printf("continuous finalscore %d\n",finalscore));
 
@@ -9688,8 +9685,9 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor,
 		Univcoord_T knownsplice_limit_low, Univcoord_T knownsplice_limit_high,
 		char *queryseq_ptr, char *queryuc_ptr,
 		int cdna_direction, bool watsonp, bool jump_late_p, Pairpool_T pairpool,
-		Dynprog_T dynprog, int maxpeelback, double defect_rate, Endalign_T endalign) {
-  List_T continuous_gappairs_distalgap = NULL, peeled_pairs;
+		Dynprog_T dynprog, int maxpeelback, double defect_rate, Endalign_T endalign,
+		bool forcep) {
+  List_T continuous_gappairs_distalgap = NULL, peeled_pairs = NULL;
   int queryjump, genomejump;
   int querydp5, querydp3_distalgap;
   Chrpos_T genomedp3_distalgap;
@@ -9758,7 +9756,8 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor,
     continuous_gappairs_distalgap = Dynprog_end5_gap(&(*dynprogindex_minor),&(*finalscore),
 						     &nmatches,&nmismatches,&nopens,&nindels,dynprog,
 						     &(queryseq_ptr[querydp3_distalgap]),&(queryuc_ptr[querydp3_distalgap]),
-						     queryjump,genomejump,querydp3_distalgap,genomedp3_distalgap,
+						     /*rlength*/queryjump,/*glength*/genomejump,
+						     /*rev_roffset*/querydp3_distalgap,/*rev_goffset*/genomedp3_distalgap,
 						     chroffset,chrhigh,watsonp,jump_late_p,pairpool,
 						     extraband_end,defect_rate,endalign,/*require_pos_score_p*/false);
     *ambig_end_length = 0;
@@ -9768,7 +9767,7 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor,
 
   debug(printf("  finalscore: %d\n",*finalscore));
   if (continuous_gappairs_distalgap == NULL) {
-    return (List_T) NULL;
+    return peeled_pairs;
   } else {
     firstpair = List_head(continuous_gappairs_distalgap);
     if (0 && firstpair->querypos != querydp3_distalgap) {
@@ -9776,11 +9775,14 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor,
       /* Must have an indel between the gappairs and the rest of the read */
       debug(printf("Detected indel between gappairs %d and the rest of the read %d\n",
 		   firstpair->querypos,querydp3_distalgap));
-      return (List_T) NULL;
+      return peeled_pairs;
 
+    } else if (forcep == true) {
+      /* For example, needed for extending middles of chimeras */
+      return continuous_gappairs_distalgap;
     } else if (*finalscore <= 0) {
       *knownsplicep = false;
-      return (List_T) NULL;
+      return peeled_pairs;
     } else {
       return continuous_gappairs_distalgap;
     }
@@ -9910,8 +9912,8 @@ extend_ending3 (bool *knownsplicep, int *dynprogindex_minor, int *finalscore,
 		char *queryseq_ptr, char *queryuc_ptr,
 		int cdna_direction, bool watsonp, bool jump_late_p,
 		Pairpool_T pairpool, Dynprog_T dynprog, int maxpeelback,
-		double defect_rate, Endalign_T endalign) {
-  List_T continuous_gappairs_distalgap = NULL, peeled_path;
+		double defect_rate, Endalign_T endalign, bool forcep) {
+  List_T continuous_gappairs_distalgap = NULL, peeled_path = NULL;
   int queryjump, genomejump;
   int querydp5_distalgap, querydp3;
   Chrpos_T genomedp5_distalgap;
@@ -9986,7 +9988,7 @@ extend_ending3 (bool *knownsplicep, int *dynprogindex_minor, int *finalscore,
 
   debug(printf("  finalscore: %d\n",*finalscore));
   if (continuous_gappairs_distalgap == NULL) {
-    return (List_T) NULL;
+    return peeled_path;
   } else {
     continuous_gappairs_distalgap = List_reverse(continuous_gappairs_distalgap);
     firstpair = List_head(continuous_gappairs_distalgap);
@@ -9995,11 +9997,14 @@ extend_ending3 (bool *knownsplicep, int *dynprogindex_minor, int *finalscore,
       /* Must have an indel between the gappairs and the rest of the read */
       debug(printf("Detected indel between gappairs %d and the rest of the read %d\n",
 		   firstpair->querypos,querydp5_distalgap));
-      return (List_T) NULL;
+      return peeled_path;
       
+    } else if (forcep == true) {
+      /* For example, needed for extending middle of chimeras */
+      return continuous_gappairs_distalgap;
     } else if (*finalscore <= 0) {
       *knownsplicep = false;
-      return (List_T) NULL;
+      return peeled_path;
     } else {
       return continuous_gappairs_distalgap;
     }
@@ -10852,26 +10857,30 @@ traverse_dual_break (List_T pairs, List_T *path, Pair_T leftpair, Pair_T rightpa
     chrend = (chrhigh - chroffset) - genomedp5;
   }
 
-  assert(chrend >= chrstart);
-  
-  debug14(printf("Starting stage2 with chrstart %u, chrend %u, watsonp %d\n",
-		 chrstart,chrend,watsonp));
-  gappairs = Stage2_compute_one(
+  /* chrend can be equal to chrstart - 1 if peelback fails on both ends */
+  assert(chrend + 1 >= chrstart);
+  if (chrend < chrstart) {
+    gappairs = (List_T) NULL;
+  } else {
+    debug14(printf("Starting stage2 with chrstart %u, chrend %u, watsonp %d\n",
+		   chrstart,chrend,watsonp));
+    gappairs = Stage2_compute_one(
 #ifdef PMAP
-				&(queryaaseq_ptr[querydp5]),&(queryaaseq_ptr[querydp5]),
-				/*querylength*/querydp3-querydp5+1,/*query_offset*/querydp5*3,
+				  &(queryaaseq_ptr[querydp5]),&(queryaaseq_ptr[querydp5]),
+				  /*querylength*/querydp3-querydp5+1,/*query_offset*/querydp5*3,
 #else
-				&(queryseq_ptr[querydp5]),&(queryuc_ptr[querydp5]),
-				/*querylength*/querydp3-querydp5+1,/*query_offset*/querydp5,
+				  &(queryseq_ptr[querydp5]),&(queryuc_ptr[querydp5]),
+				  /*querylength*/querydp3-querydp5+1,/*query_offset*/querydp5,
 #endif
-				chrstart,chrend,chroffset,chrhigh,/*plusp*/watsonp,genestrand,
+				  chrstart,chrend,chroffset,chrhigh,/*plusp*/watsonp,genestrand,
 
-				oligoindices_minor,pairpool,diagpool,cellpool,
-				/*localp should be false*/true,/*skip_repetitive_p*/false,
-				/*favor_right_p*/false,/*debug_graphic_p*/false);
-  
-  debug14(printf("Internal stage2 result:\n"));
-  debug14(Pair_dump_list(gappairs,true));
+				  oligoindices_minor,pairpool,diagpool,cellpool,
+				  /*localp should be false*/true,/*skip_repetitive_p*/false,
+				  /*favor_right_p*/false,/*debug_graphic_p*/false);
+    
+    debug14(printf("Internal stage2 result:\n"));
+    debug14(Pair_dump_list(gappairs,true));
+  }
 
   if (gappairs == NULL) {
     pairs = Pairpool_transfer(pairs,peeled_pairs);
@@ -11028,7 +11037,7 @@ build_path_end3 (bool *knownsplicep, int *ambig_end_length_3, Splicetype_T *ambi
 		 char *queryseq_ptr, char *queryuc_ptr,
 		 int cdna_direction, bool watsonp, bool jump_late_p, int maxpeelback,
 		 double defect_rate, Pairpool_T pairpool, Dynprog_T dynprogL,
-		 bool extendp, Endalign_T endalign) {
+		 bool extendp, Endalign_T endalign, bool forcep) {
   List_T gappairs;
   Pair_T leftpair;
   /* int genomejump */
@@ -11093,7 +11102,7 @@ build_path_end3 (bool *knownsplicep, int *ambig_end_length_3, Splicetype_T *ambi
 			      chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
 			      queryseq_ptr,queryuc_ptr,
 			      cdna_direction,watsonp,jump_late_p,pairpool,dynprogL,maxpeelback,
-			      defect_rate,endalign);
+			      defect_rate,endalign,forcep);
   } else {
     /* Looks like we aren't calling this anymore */
     abort();
@@ -11139,7 +11148,7 @@ build_pairs_end5 (bool *knownsplicep, int *ambig_end_length_5, Splicetype_T *amb
 		  char *queryseq_ptr, char *queryuc_ptr,
 		  int cdna_direction, bool watsonp, bool jump_late_p, int maxpeelback,
 		  double defect_rate, Pairpool_T pairpool, Dynprog_T dynprogR,
-		  bool extendp, Endalign_T endalign) {
+		  bool extendp, Endalign_T endalign, bool forcep) {
   List_T gappairs;
   Pair_T rightpair;
   int queryjump, leftquerypos;
@@ -11200,7 +11209,7 @@ build_pairs_end5 (bool *knownsplicep, int *ambig_end_length_5, Splicetype_T *amb
 			      chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
 			      queryseq_ptr,queryuc_ptr,
 			      cdna_direction,watsonp,jump_late_p,pairpool,dynprogR,
-			      maxpeelback,defect_rate,endalign);
+			      maxpeelback,defect_rate,endalign,forcep);
   } else {
     /* Looks like we aren't calling this anymore */
     abort();
@@ -13364,8 +13373,7 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do
 			   queryseq_ptr,queryuc_ptr,
 			   cdna_direction,watsonp,jump_late_p,
 			   maxpeelback,defect_rate,pairpool,dynprogR,
-			   /*extendp*/true,/*endalign*/QUERYEND_GAP);
-
+			   /*extendp*/true,/*endalign*/QUERYEND_GAP,/*forcep*/false);
 
 
   /* Necessary to insert gaps and assign gap types (fills in cDNA
@@ -13412,7 +13420,7 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do
 				 queryseq_ptr,queryuc_ptr,
 				 cdna_direction,watsonp,jump_late_p,
 				 maxpeelback,defect_rate,pairpool,dynprogR,/*extendp*/true,
-				 /*endalign*/BEST_LOCAL);
+				 /*endalign*/BEST_LOCAL,/*forcep*/false);
 	debug3(printf("AFTER 5' REBUILD\n"));
 	debug3(Pair_dump_list(pairs,true));
       }
@@ -13432,7 +13440,7 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do
 			     queryseq_ptr,queryuc_ptr,
 			     cdna_direction,watsonp,jump_late_p,
 			     maxpeelback,defect_rate,pairpool,dynprogR,/*extendp*/true,
-			     /*endalign*/QUERYEND_INDELS);
+			     /*endalign*/QUERYEND_INDELS,/*forcep*/false);
 #endif
   }
 
@@ -13446,7 +13454,7 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do
 			   queryseq_ptr,queryuc_ptr,
 			   cdna_direction,watsonp,jump_late_p,
 			   maxpeelback,defect_rate,pairpool,dynprogR,
-			   /*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
+			   /*extendp*/true,/*endalign*/QUERYEND_NOGAPS,/*forcep*/false);
 
   debug(Pair_dump_list(pairs,true));
   debug(printf("End of path_compute_end5\n"));
@@ -13514,7 +13522,7 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do
 			 queryseq_ptr,queryuc_ptr,
 			 cdna_direction,watsonp,jump_late_p,
 			 maxpeelback,defect_rate,pairpool,dynprogL,
-			 /*extendp*/true,/*endalign*/QUERYEND_GAP);
+			 /*extendp*/true,/*endalign*/QUERYEND_GAP,/*forcep*/false);
 
   /* Necessary to insert gaps and assign gap types (fills in cDNA
      insertions, so they don't get trimmed), in case an insertion was
@@ -13561,7 +13569,7 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do
 			       queryseq_ptr,queryuc_ptr,
 			       cdna_direction,watsonp,jump_late_p,
 			       maxpeelback,defect_rate,pairpool,dynprogL,/*extendp*/true,
-			       /*endalign*/BEST_LOCAL);
+			       /*endalign*/BEST_LOCAL,/*forcep*/false);
 	debug3(printf("AFTER 3' REBUILD\n"));
 	debug3(Pair_dump_list(path,true));
       }
@@ -13580,7 +13588,7 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do
 			   queryseq_ptr,queryuc_ptr,
 			   cdna_direction,watsonp,jump_late_p,
 			   maxpeelback,defect_rate,pairpool,dynprogL,/*extendp*/true,
-			   /*endalign*/QUERYEND_NOGAPS);
+			   /*endalign*/QUERYEND_NOGAPS,/*forcep*/false);
 #endif
   }
 
@@ -13594,7 +13602,7 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do
 			 queryseq_ptr,queryuc_ptr,
 			 cdna_direction,watsonp,jump_late_p,
 			 maxpeelback,defect_rate,pairpool,dynprogL,
-			 /*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
+			 /*extendp*/true,/*endalign*/QUERYEND_NOGAPS,/*forcep*/false);
 
   debug(Pair_dump_list(path,true));
   debug(printf("End of path_compute_end3\n"));
@@ -15024,7 +15032,7 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
 				 queryseq_ptr,queryuc_ptr,
 				 *cdna_direction,watsonp,jump_late_p,
 				 maxpeelback,defect_rate,pairpool,dynprogR,
-				 /*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
+				 /*extendp*/true,/*endalign*/QUERYEND_NOGAPS,/*forcep*/false);
 	pairs = trim_end5_exons(&indelp,&trim5p,*ambig_end_length_5,pairs,dynprogR,chroffset,chrhigh,
 				queryseq_ptr,queryuc_ptr,querylength,*cdna_direction,watsonp,
 				jump_late_p,pairpool,defect_rate);
@@ -15049,7 +15057,7 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
 			       queryseq_ptr,queryuc_ptr,
 			       *cdna_direction,watsonp,jump_late_p,
 			       maxpeelback,defect_rate,pairpool,dynprogL,
-			       /*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
+			       /*extendp*/true,/*endalign*/QUERYEND_NOGAPS,/*forcep*/false);
 	path = trim_end3_exons(&indelp,&trim3p,*ambig_end_length_3,path,dynprogL,chroffset,chrhigh,
 			       queryseq_ptr,queryuc_ptr,querylength,
 			       *cdna_direction,watsonp,jump_late_p,pairpool,defect_rate);
@@ -15954,13 +15962,31 @@ Stage3_merge_chimera (T this_left, T this_right,
   Chrpos_T genomedp5, genomedp3;
   bool protectedp;
 
+  debug10(printf("Entered stage3_merge_chimera with minpos1 %d, maxpos1 %d, minpos2 %d, maxpos2 %d\n",
+		minpos1,maxpos1,minpos2,maxpos2));
 
   orig_left_pairs = Pairpool_copy(this_left->pairs,pairpool);
   orig_right_pairs = Pairpool_copy(this_right->pairs,pairpool);
 
+#ifdef DEBUG10
+  printf("ORIG LEFT PAIRS:\n");
+  Pair_dump_list(orig_left_pairs,true);
+  printf("ORIG RIGHT PAIRS:\n");
+  Pair_dump_list(orig_right_pairs,true);
+#endif
+
+
   this_left->pairs = Pair_clip_bounded_list(this_left->pairs,minpos1,maxpos1);
   this_right->pairs = Pair_clip_bounded_list(this_right->pairs,minpos2,maxpos2);
 
+#ifdef DEBUG10
+  printf("CLIPPED LEFT PAIRS:\n");
+  Pair_dump_list(this_left->pairs,true);
+  printf("CLIPPED RIGHT PAIRS:\n");
+  Pair_dump_list(this_right->pairs,true);
+#endif
+
+
   if (this_left->pairs == NULL && this_right->pairs == NULL) {
     this_left->pairs = orig_left_pairs;
     this_right->pairs = orig_right_pairs;
@@ -15983,15 +16009,17 @@ Stage3_merge_chimera (T this_left, T this_right,
   } else {
     path = List_reverse(this_left->pairs);
 
-    /* To avoid indels at chimeric join, need to peelback, clean ends, extend with nogaps, and then clip*/
+    /* To avoid indels at chimeric join, need to peelback, clean ends, extend with nogaps, and then re-clip*/
     endpair = (Pair_T) path->first;
     querydp5 = endpair->querypos + 1;
     genomedp5 = endpair->genomepos + 1;
     protectedp = false;
     path = peel_leftward(&n_peeled_indels,&protectedp,&peeled_path,path,&querydp5,&genomedp5,
-			 maxpeelback,/*stop_at_indels_p*/false);
+			 maxpeelback,/*stop_at_indels_p*/true);
     path = clean_path_end3_gap_indels(path);
 
+    /* Have to use forcep == true, because we cannot put back the
+       pairs removed from peel_leftward and clean_path_end3_gap_indels */
     path = build_path_end3(&knownsplicep,&ambig_end_length_3,&ambig_splicetype,&ambig_prob_3,
 			   &chop_exon_p,&dynprogindex_minor,path,
 			   this_left->chroffset,this_left->chrhigh,/*querylength*/maxpos1+1,
@@ -16000,12 +16028,14 @@ Stage3_merge_chimera (T this_left, T this_right,
 			   this_left->cdna_direction,this_left->watsonp,
 			   /*jump_late_p*/this_left->watsonp ? false : true,
 			   maxpeelback,/*defect_rate*/0.0,pairpool,dynprogL,
-			   /*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
+			   /*extendp*/true,/*endalign*/QUERYEND_NOGAPS,/*forcep*/true);
 
     this_left->pairs = List_reverse(path);
     this_left->pairs = Pair_clip_bounded_list(this_left->pairs,minpos1,maxpos1);
+    debug10(printf("CLEANED LEFT PAIRS:\n"));
+    debug10(Pair_dump_list(this_left->pairs,true));
 
-    /* To avoid indels at chimeric join, need to peelback, clean ends, extend with nogaps, and then clip*/
+    /* To avoid indels at chimeric join, need to peelback, clean ends, extend with nogaps, and then re-clip*/
     pairs = this_right->pairs;
 
     endpair = (Pair_T) pairs->first;
@@ -16013,9 +16043,11 @@ Stage3_merge_chimera (T this_left, T this_right,
     genomedp3 = endpair->genomepos - 1;
     protectedp = false;
     pairs = peel_rightward(&n_peeled_indels,&protectedp,&peeled_pairs,pairs,&querydp3,&genomedp3,
-			   maxpeelback,/*stop_at_indels_p*/false);
+			   maxpeelback,/*stop_at_indels_p*/true);
     pairs = clean_pairs_end5_gap_indels(pairs);
 
+    /* Have to use forcep == true, because we cannot put back the
+       pairs removed from peel_rightward and clean_pairs_end5_gap_indels */
     pairs = build_pairs_end5(&knownsplicep,&ambig_end_length_5,&ambig_splicetype,&ambig_prob_5,
 			     &chop_exon_p,&dynprogindex_minor,pairs,
 			     this_right->chroffset,this_right->chrhigh,
@@ -16024,8 +16056,10 @@ Stage3_merge_chimera (T this_left, T this_right,
 			     this_right->cdna_direction,this_right->watsonp,
 			     /*jump_late_p*/this_right->watsonp ? false : true,
 			     maxpeelback,/*defect_rate*/0.0,pairpool,dynprogR,
-			     /*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
+			     /*extendp*/true,/*endalign*/QUERYEND_NOGAPS,/*forcep*/true);
     this_right->pairs = Pair_clip_bounded_list(pairs,minpos2,maxpos2);
+    debug10(printf("CLEANED RIGHT PAIRS:\n"));
+    debug10(Pair_dump_list(this_right->pairs,true));
 
     if (this_left->pairs == NULL || this_right->pairs == NULL) {
       this_left->pairs = orig_left_pairs;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git



More information about the debian-med-commit mailing list