[med-svn] [Git][med-team/gmap][upstream] New upstream version 2023-10-10.v2+ds

Étienne Mollier (@emollier) gitlab at salsa.debian.org
Wed Nov 29 22:16:28 GMT 2023



Étienne Mollier pushed to branch upstream at Debian Med / gmap


Commits:
776a1a5b by Étienne Mollier at 2023-11-29T22:56:02+01:00
New upstream version 2023-10-10.v2+ds
- - - - -


7 changed files:

- src/path-fusion.c
- src/path-print-sam.c
- src/path-solve.c
- src/shortread.c
- src/shortread.h
- src/spliceends.c
- src/stage3.c


Changes:

=====================================
src/path-fusion.c
=====================================
@@ -3256,13 +3256,19 @@ Path_fusion_inner_qend (int *found_score, T fusion5, T anchor3,
     debug(printf("Looking for qend hit in %d..%d at %u..%u\n",
 		 pos5,querylength,low_univdiagonal,high_univdiagonal));
 
-    new_univdiagonal = Spliceends_qend_resolve(&local_nmismatches,pos5,querylength,
-					       low_univdiagonal,high_univdiagonal,
-					       query_compress_main,queryptr_main,main_plusp,genestrand,
-					       novel_diagonals_alloc,localdb_alloc,
-					       streamspace_max_alloc,streamspace_alloc,
-					       streamptr_alloc,streamsize_alloc,mergeinfo,
-					       nmismatches_allowed);
+    if (low_univdiagonal > high_univdiagonal) {
+      debug(printf("Skipping because %u > %u\n",low_univdiagonal,high_univdiagonal));
+      new_univdiagonal = 0;
+    } else {
+      new_univdiagonal = Spliceends_qend_resolve(&local_nmismatches,pos5,querylength,
+						 low_univdiagonal,high_univdiagonal,
+						 query_compress_main,queryptr_main,main_plusp,genestrand,
+						 novel_diagonals_alloc,localdb_alloc,
+						 streamspace_max_alloc,streamspace_alloc,
+						 streamptr_alloc,streamsize_alloc,mergeinfo,
+						 nmismatches_allowed);
+    }
+
   } else {
     goal_univdiagonal = add_bounded(Path_genomichigh(anchor3) - anchor3->querylength,
 				    /*expected_pairlength*/30000,anchor3->chrhigh);
@@ -3298,13 +3304,17 @@ Path_fusion_inner_qend (int *found_score, T fusion5, T anchor3,
     debug(printf("Looking for qstart hit in 0..%d at %u..%u\n",
 		 pos3,low_univdiagonal,high_univdiagonal));
 
-    new_univdiagonal = Spliceends_qstart_resolve(&local_nmismatches,pos3,querylength,
-						 low_univdiagonal,high_univdiagonal,
-						 query_compress_main,queryptr_main,main_plusp,genestrand,
-						 novel_diagonals_alloc,localdb_alloc,
-						 streamspace_max_alloc,streamspace_alloc,
-						 streamptr_alloc,streamsize_alloc,mergeinfo,
-						 nmismatches_allowed);
+    if (low_univdiagonal > high_univdiagonal) {
+      new_univdiagonal = 0;
+    } else {
+      new_univdiagonal = Spliceends_qstart_resolve(&local_nmismatches,pos3,querylength,
+						   low_univdiagonal,high_univdiagonal,
+						   query_compress_main,queryptr_main,main_plusp,genestrand,
+						   novel_diagonals_alloc,localdb_alloc,
+						   streamspace_max_alloc,streamspace_alloc,
+						   streamptr_alloc,streamsize_alloc,mergeinfo,
+						   nmismatches_allowed);
+    }
   }
 
   if (new_univdiagonal == 0) {
@@ -3465,13 +3475,18 @@ Path_fusion_inner_qstart (int *found_score, T fusion3, T anchor5,
     debug(printf("Looking for qstart hit in %d..%d at %u..%u\n",
 		 0,pos3,low_univdiagonal,high_univdiagonal));
 
-    new_univdiagonal = Spliceends_qstart_resolve(&local_nmismatches,pos3,querylength,
-						 low_univdiagonal,high_univdiagonal,
-						 query_compress_main,queryptr_main,main_plusp,genestrand,
-						 novel_diagonals_alloc,localdb_alloc,
-						 streamspace_max_alloc,streamspace_alloc,
-						 streamptr_alloc,streamsize_alloc,mergeinfo,
-						 nmismatches_allowed);
+    if (low_univdiagonal > high_univdiagonal) {
+      new_univdiagonal = 0;
+    } else {
+      new_univdiagonal = Spliceends_qstart_resolve(&local_nmismatches,pos3,querylength,
+						   low_univdiagonal,high_univdiagonal,
+						   query_compress_main,queryptr_main,main_plusp,genestrand,
+						   novel_diagonals_alloc,localdb_alloc,
+						   streamspace_max_alloc,streamspace_alloc,
+						   streamptr_alloc,streamsize_alloc,mergeinfo,
+						   nmismatches_allowed);
+    }
+
   } else {
     goal_univdiagonal = subtract_bounded(Path_genomiclow(anchor5) + anchor5->querylength + fusion3->querylength,
 					 /*expected_pairlength*/30000,anchor5->chroffset + anchor5->querylength);
@@ -3508,13 +3523,17 @@ Path_fusion_inner_qstart (int *found_score, T fusion3, T anchor5,
     debug(printf("Looking for qend hit in %d..%d at %u..%u\n",
 		 pos5,querylength,low_univdiagonal,high_univdiagonal));
 
-    new_univdiagonal = Spliceends_qend_resolve(&local_nmismatches,pos5,querylength,
-					       low_univdiagonal,high_univdiagonal,
-					       query_compress_main,queryptr_main,main_plusp,genestrand,
-					       novel_diagonals_alloc,localdb_alloc,
-					       streamspace_max_alloc,streamspace_alloc,
-					       streamptr_alloc,streamsize_alloc,mergeinfo,
-					       nmismatches_allowed);
+    if (low_univdiagonal > high_univdiagonal) {
+      new_univdiagonal = 0;
+    } else {
+      new_univdiagonal = Spliceends_qend_resolve(&local_nmismatches,pos5,querylength,
+						 low_univdiagonal,high_univdiagonal,
+						 query_compress_main,queryptr_main,main_plusp,genestrand,
+						 novel_diagonals_alloc,localdb_alloc,
+						 streamspace_max_alloc,streamspace_alloc,
+						 streamptr_alloc,streamsize_alloc,mergeinfo,
+						 nmismatches_allowed);
+    }
   }
 
   if (new_univdiagonal == 0) {


=====================================
src/path-print-sam.c
=====================================
@@ -894,9 +894,9 @@ print_substrings (int *hardclip_low, int *hardclip_high,
     if (Shortread_quality_string(queryseq) != NULL) {
       FPRINTF(fp,"\tXI:Z:");
       if (path->plusp == true) {
-	Shortread_print_chopped_end_quality(fp,queryseq,*hardclip_low,*hardclip_high);
+	Shortread_print_chopped_end_quality(fp,queryseq,*hardclip_low,*hardclip_high,quality_shift);
       } else {
-	Shortread_print_chopped_end_quality_reverse(fp,queryseq,*hardclip_low,*hardclip_high);
+	Shortread_print_chopped_end_quality_reverse(fp,queryseq,*hardclip_low,*hardclip_high,quality_shift);
       }
     }
   }
@@ -1323,7 +1323,7 @@ print_fusion (Filestring_T fp, char *abbrev, T path,
 	      int pairedlength, int pair_relationship, int hardclip_low, int hardclip_high,
 
 	      T mate, Resulttype_T resulttype, bool first_read_p, bool artificial_mate_p, int npaths_mate,
-	      char *sam_read_group_id, bool invertp, bool invert_mate_p) {
+	      int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p) {
   unsigned int flag = 0U;
 
   int hardclip_low_ignore, hardclip_high_ignore;
@@ -1483,13 +1483,13 @@ print_fusion (Filestring_T fp, char *abbrev, T path,
     } else {
       FPRINTF(fp,"\t");
       if (path->plusp == true && path->fusion_plusp == true) {
-	Shortread_print_chopped_end_quality(fp,queryseq,hardclip_low,hardclip_high);
+	Shortread_print_chopped_end_quality(fp,queryseq,hardclip_low,hardclip_high,quality_shift);
       } else if (path->plusp == true && path->fusion_plusp == false) {
-	Shortread_print_chopped_end_quality_reverse(fp,queryseq,hardclip_high,hardclip_low);
+	Shortread_print_chopped_end_quality_reverse(fp,queryseq,hardclip_high,hardclip_low,quality_shift);
       } else if (path->plusp == false && path->fusion_plusp == true) {
-	Shortread_print_chopped_end_quality(fp,queryseq,hardclip_high,hardclip_low);
+	Shortread_print_chopped_end_quality(fp,queryseq,hardclip_high,hardclip_low,quality_shift);
       } else {
-	Shortread_print_chopped_end_quality_reverse(fp,queryseq,hardclip_low,hardclip_high);
+	Shortread_print_chopped_end_quality_reverse(fp,queryseq,hardclip_low,hardclip_high,quality_shift);
       }
     }
   }
@@ -1908,7 +1908,7 @@ Path_print_sam (Filestring_T fp, Filestring_T *fp_failedinput, char *abbrev,
 		 queryseq,mate_queryseq,single_cell_infoseq,pairedlength,pair_relationship,
 		 hardclip_low,hardclip_high,mate,
 		 resulttype,first_read_p,artificial_mate_p,
-		 npaths_mate,sam_read_group_id,invertp,invert_mate_p);
+		 npaths_mate,quality_shift,sam_read_group_id,invertp,invert_mate_p);
   }
 
   return;


=====================================
src/path-solve.c
=====================================
@@ -246,7 +246,7 @@ attach_indel_qstart_simple (int adj, T path, int indel_pos,
 /* Returns a newpath without modifying or deleting path */
 static T
 attach_indel_qstart (T path, Univcoord_T low_univdiagonal, int low_qstart,
-		     Univcoord_T chrhigh, int querylength, Indelinfo_T indelinfo,
+		     Univcoord_T chroffset, Univcoord_T chrhigh, int querylength, Indelinfo_T indelinfo,
 		     Compress_T query_compress, bool plusp, int genestrand,
 		     int max_insertionlen, int max_deletionlen,
 		     Intlistpool_T intlistpool, Univcoordlistpool_T univcoordlistpool,
@@ -286,6 +286,10 @@ attach_indel_qstart (T path, Univcoord_T low_univdiagonal, int low_qstart,
     /* Impossible */
     debug13(printf("Impossible\n"));
 
+  } else if (low_univdiagonal + low_qstart < chroffset + querylength) {
+    debug13(printf("Extends beyond start of chromosome: low_univdiagonal %u - querylength %d + low_qstart %d vs chroffset %u\n",
+		   low_univdiagonal,querylength,low_qstart,chroffset));
+
   } else if (low_univdiagonal > univdiagonal) {
     /* (A) Insertion */
     nindels = low_univdiagonal - univdiagonal;
@@ -670,6 +674,10 @@ attach_unknown_qstart (T path, Univcoord_T low_univdiagonal, int low_qstart,
   if (low_qstart >= qend) {
     debug13(printf("Does not add to start of path: low_qstart %d >= qend %d\n",low_qstart,qend));
 
+  } else if (low_univdiagonal + low_qstart < chroffset + querylength) {
+    debug13(printf("Extends beyond start of chromosome: low_univdiagonal %u - querylength %d + low_qstart %u vs chroffset %u\n",
+		   low_univdiagonal,querylength,low_qstart,chroffset));
+
   } else if (low_univdiagonal == univdiagonal) {
     if (low_qstart >= Intlist_head(path->endpoints)) {
       debug13(printf("Mismatch fails, since new endpoint %d >= old endpoint %d\n",low_qstart,Intlist_head(path->endpoints)));
@@ -1433,6 +1441,10 @@ attach_indel_qend (T path, Univcoord_T high_univdiagonal, int high_qend,
     /* Impossible */
     debug13(printf("Impossible\n"));
 
+  } else if (high_univdiagonal + high_qend >= chrhigh + querylength) {
+    debug13(printf("Extends beyond end of chromosome: high_univdiagonal %u - querylength %d + high_qend %d vs chrhigh %u\n",
+		   high_univdiagonal,querylength,high_qend,chrhigh));
+
   } else if (high_univdiagonal < univdiagonal) {
     /* (A) Insertion */
     nindels = univdiagonal - high_univdiagonal;
@@ -1829,6 +1841,10 @@ attach_unknown_qend (T path, Univcoord_T high_univdiagonal, int high_qstart, int
   if (qstart >= high_qend) {
     debug13(printf("Does not add to end of path: qstart %d >= high_qend %d\n",qstart,high_qend));
     
+  } else if (high_univdiagonal + high_qend >= chrhigh + querylength) {
+    debug13(printf("Extends beyond end of chromosome: high_univdiagonal %u - querylength %d + high_qend %d vs chrhigh %u\n",
+		   high_univdiagonal,querylength,high_qend,chrhigh));
+
   } else if (high_univdiagonal == univdiagonal) {
     if (high_qend <= Intlist_head(path->endpoints)) {
       debug13(printf("Mismatch fails, since new endpoint %d <= old endpoint %d\n",high_qend,Intlist_head(path->endpoints)));
@@ -3290,7 +3306,7 @@ compute_qstart_local (List_T *unextended_paths, List_T *complete_paths,
 	
 	/* Subtract adj to get low diagonal, but add adj to get high diagonal */
 	if ((newpath = attach_indel_qstart(path,/*low_diagonal*/univdiagonal - adj,/*low_qstart*/trimpos,
-					   chrhigh,querylength,indelinfo,
+					   chroffset,chrhigh,querylength,indelinfo,
 					   query_compress,plusp,genestrand,
 					   max_insertionlen,max_deletionlen,
 					   intlistpool,univcoordlistpool,listpool,pathpool,vectorpool)) != NULL) {


=====================================
src/shortread.c
=====================================
@@ -4801,17 +4801,6 @@ Shortread_print_chopped_revcomp_sam (Filestring_T fp, T this, int hardclip_low,
 
 void
 Shortread_print_hardclipped_sam (Filestring_T fp, T this, int hardclip_low, int hardclip_high) {
-#ifdef PRINT_INDIVIDUAL_CHARS
-  int i;
-#endif
-
-#ifdef PRINT_INDIVIDUAL_CHARS
-  FPRINTF(fp,"\t");
-  for (i = hardclip_low; i < this->fulllength - hardclip_high; i++) {
-    FPRINTF(fp,"%c",this->contents[i]);
-  }
-#else
-  FPRINTF(fp,"\t");
 
   /* Hardclip on same side as chop includes choplength */
   if (this->left_chop != NULL && hardclip_low > 0) {
@@ -4821,14 +4810,14 @@ Shortread_print_hardclipped_sam (Filestring_T fp, T this, int hardclip_low, int
     hardclip_high -= this->right_choplength;
   }
 
+  FPRINTF(fp,"\t");
   if (this->left_chop != NULL && hardclip_low == 0) {
-    FPRINTF(fp,"%.*s",this->left_choplength,this->left_chop);
+    FPRINTF(fp,"%s",this->left_chop);
   }
   FPRINTF(fp,"%.*s",this->fulllength - hardclip_high - hardclip_low,&(this->contents[hardclip_low]));
   if (this->right_chop != NULL && hardclip_high == 0) {
-    FPRINTF(fp,"%.*s",this->right_choplength,this->right_chop);
+    FPRINTF(fp,"%s",this->right_chop);
   }
-#endif
 
   return;
 }
@@ -4837,14 +4826,6 @@ Shortread_print_hardclipped_sam (Filestring_T fp, T this, int hardclip_low, int
 void
 Shortread_print_hardclipped_revcomp_sam (Filestring_T fp, T this, int hardclip_low, int hardclip_high) {
 
-#ifdef PRINT_INDIVIDUAL_CHARS
-  FPRINTF(fp,"\t");
-  for (i = this->fulllength - 1 - hardclip_low; i >= hardclip_high; --i) {
-    FPRINTF(fp,"%c",complCode[(int) this->contents[i]]);
-  }
-#else
-  FPRINTF(fp,"\t");
-
   /* Hardclip on same side as chop includes choplength */
   if (this->right_chop != NULL && hardclip_low > 0) {
     hardclip_low -= this->right_choplength;
@@ -4853,14 +4834,14 @@ Shortread_print_hardclipped_revcomp_sam (Filestring_T fp, T this, int hardclip_l
     hardclip_high -= this->left_choplength;
   }
 
+  FPRINTF(fp,"\t");
   if (this->right_chop != NULL && hardclip_low == 0) {
-    FPRINTF(fp,"%.*R",this->right_choplength,this->right_chop);
+    FPRINTF(fp,"%R",this->right_chop);
   }
   FPRINTF(fp,"%.*R",this->fulllength - hardclip_high - hardclip_low,&(this->contents[hardclip_high]));
   if (this->left_chop != NULL && hardclip_high == 0) {
-    FPRINTF(fp,"%.*R",this->left_choplength,this->left_chop);
+    FPRINTF(fp,"%R",this->left_chop);
   }
-#endif
 
   return;
 }
@@ -4876,45 +4857,52 @@ Shortread_print_hardclipped_quality (Filestring_T fp, T this, int hardclip_low,
   if (this->quality == NULL) {
     FPRINTF(fp,"\t*");
 
-  } else if (shift == 0) {
-    FPRINTF(fp,"\t");
-    if (this->left_chop_quality != NULL && hardclip_low == 0) {
-      FPRINTF(fp,"%.*s",this->left_choplength,this->left_chop_quality);
+  } else {
+    if (this->left_chop_quality != NULL && hardclip_low > 0) {
+      hardclip_low -= this->left_choplength;
     }
-    FPRINTF(fp,"%.*s",this->fulllength - hardclip_high - hardclip_low,&(this->quality[hardclip_low]));
-    if (this->right_chop_quality != NULL && hardclip_high == 0) {
-      FPRINTF(fp,"%.*s",this->right_choplength,this->right_chop_quality);
+    if (this->right_chop_quality != NULL && hardclip_high > 0) {
+      hardclip_high -= this->right_choplength;
     }
 
-  } else {
-    FPRINTF(fp,"\t");
-    if (this->left_chop_quality != NULL && hardclip_low == 0) {
-      for (i = 0; i < this->left_choplength; i++) {
-	if ((c = this->left_chop_quality[i] + shift) <= 32) {
-	  fprintf(stderr,"Warning: With a quality-print-shift of %d, QC score %c becomes non-printable.  May need to specify --quality-protocol or --quality-print-shift\n",
-		  shift,this->left_chop_quality[i]);
+    if (shift == 0) {
+      FPRINTF(fp,"\t");
+      if (this->left_chop_quality != NULL && hardclip_low == 0) {
+	FPRINTF(fp,"%s",this->left_chop_quality);
+      }
+      FPRINTF(fp,"%.*s",this->fulllength - hardclip_high - hardclip_low,&(this->quality[hardclip_low]));
+      if (this->right_chop_quality != NULL && hardclip_high == 0) {
+	FPRINTF(fp,"%s",this->right_chop_quality);
+      }
+
+    } else {
+      FPRINTF(fp,"\t");
+      if (this->left_chop_quality != NULL && hardclip_low == 0) {
+	for (i = 0; i < this->left_choplength; i++) {
+	  if ((c = this->left_chop_quality[i] + shift) <= 32) {
+	    abort();
+	  } else {
+	    FPRINTF(fp,"%c",c);
+	  }
+	}
+      }
+
+      for (i = hardclip_low; i < this->fulllength - hardclip_high; i++) {
+	if ((c = this->quality[i] + shift) <= 32) {
 	  abort();
 	} else {
 	  FPRINTF(fp,"%c",c);
 	}
       }
-    }
-    for (i = hardclip_low; i < this->fulllength - hardclip_high; i++) {
-      if ((c = this->quality[i] + shift) <= 32) {
-	fprintf(stderr,"Warning: With a quality-print-shift of %d, QC score %c becomes non-printable.  May need to specify --quality-protocol or --quality-print-shift\n",
-		shift,this->left_chop_quality[i]);
-	abort();
-      } else {
-	FPRINTF(fp,"%c",c);
-      }
-    }
-    if (this->right_chop_quality != NULL && hardclip_high == 0) {
-      if ((c = this->right_chop_quality[i] + shift) <= 32) {
-	fprintf(stderr,"Warning: With a quality-print-shift of %d, QC score %c becomes non-printable.  May need to specify --quality-protocol or --quality-print-shift\n",
-		shift,this->right_chop_quality[i]);
-	abort();
-      } else {
-	FPRINTF(fp,"%c",c);
+
+      if (this->right_chop_quality != NULL && hardclip_high == 0) {
+	for (i = 0; i < this->right_choplength; i++) {
+	  if ((c = this->right_chop_quality[i] + shift) <= 32) {
+	    abort();
+	  } else {
+	    FPRINTF(fp,"%c",c);
+	  }
+	}
       }
     }
   }
@@ -4933,48 +4921,54 @@ Shortread_print_hardclipped_reverse_quality (Filestring_T fp, T this, int hardcl
   if (this->quality == NULL) {
     FPRINTF(fp,"\t*");
 
-  } else if (shift == 0) {
-    FPRINTF(fp,"\t");
-    if (this->right_chop != NULL && hardclip_high == 0) {
-      FPRINTF(fp,"%.*R",this->right_choplength,this->right_chop_quality);
+  } else {
+    /* Hardclip on same side as chop includes choplength */
+    if (this->right_chop_quality != NULL && hardclip_low > 0) {
+      hardclip_low -= this->right_choplength;
     }
-    FPRINTF(fp,"%.*R",this->fulllength - hardclip_high - hardclip_low,&(this->quality[hardclip_high]));
-    if (this->left_chop != NULL && hardclip_low == 0) {
-      FPRINTF(fp,"%.*R",this->left_choplength,this->left_chop_quality);
+    if (this->left_chop_quality != NULL && hardclip_high > 0) {
+      hardclip_high -= this->left_choplength;
     }
 
-  } else {
-    FPRINTF(fp,"\t");
-    if (this->right_chop != NULL && hardclip_high == 0) {
-      for (i = this->right_choplength - 1; i >= 0; --i) {
-	if ((c = this->right_chop_quality[i] + shift) <= 32) {
-	  fprintf(stderr,"Warning: With a quality-print-shift of %d, QC score %c becomes non-printable.  May need to specify --quality-protocol or --quality-print-shift\n",
-		  shift,this->right_chop_quality[i]);
-	  abort();
-	} else {
-	  FPRINTF(fp,"%c",c);
-	}
+    if (shift == 0) {
+      FPRINTF(fp,"\t");
+      if (this->right_chop != NULL && hardclip_low == 0) {
+	FPRINTF(fp,"%r",this->right_chop_quality);
       }
-    }
-    for (i = this->fulllength - hardclip_high - 1; i >= hardclip_low; --i) {
-      if ((c = this->quality[i] + shift) <= 32) {
-	fprintf(stderr,"Warning: With a quality-print-shift of %d, QC score %c becomes non-printable.  May need to specify --quality-protocol or --quality-print-shift\n",
-		shift,this->left_chop_quality[i]);
-	abort();
-      } else {
-	FPRINTF(fp,"%c",c);
+      FPRINTF(fp,"%.*r",this->fulllength - hardclip_high - hardclip_low,&(this->quality[hardclip_high]));
+      if (this->left_chop != NULL && hardclip_high == 0) {
+	FPRINTF(fp,"%r",this->left_chop_quality);
       }
-    }
-    if (this->left_chop_quality != NULL && hardclip_low == 0) {
-      for (i = this->left_choplength - 1; i >= 0; --i) {
-	if ((c = this->left_chop_quality[i] + shift) <= 32) {
-	  fprintf(stderr,"Warning: With a quality-print-shift of %d, QC score %c becomes non-printable.  May need to specify --quality-protocol or --quality-print-shift\n",
-		  shift,this->left_chop_quality[i]);
+
+    } else {
+      FPRINTF(fp,"\t");
+      if (this->right_chop_quality != NULL && hardclip_low == 0) {
+	for (i = this->right_choplength - 1; i >= 0; --i) {
+	  if ((c = this->right_chop_quality[i] + shift) <= 32) {
+	    abort();
+	  } else {
+	    FPRINTF(fp,"%c",c);
+	  }
+	}
+      }
+
+      for (i = this->fulllength - hardclip_low - 1; i >= hardclip_high; --i) {
+	if ((c = this->quality[i] + shift) <= 32) {
 	  abort();
 	} else {
 	  FPRINTF(fp,"%c",c);
 	}
       }
+
+      if (this->left_chop_quality != NULL && hardclip_high == 0) {
+	for (i = this->left_choplength - 1; i >= 0; --i) {
+	  if ((c = this->left_chop_quality[i] + shift) <= 32) {
+	    abort();
+	  } else {
+	    FPRINTF(fp,"%c",c);
+	  }
+	}
+      }
     }
   }
 
@@ -5018,7 +5012,7 @@ Shortread_print_chopped_end_revcomp (Filestring_T fp, T this, int hardclip_low,
       FPRINTF(fp,"%.*R",hardclip_low,&(this->contents[this->fulllength - hardclip_low]));
     } else {
       hardclip_low -= this->right_choplength;
-      FPRINTF(fp,"%.*R",this->right_choplength,this->right_chop);
+      FPRINTF(fp,"%R",this->right_chop);
       FPRINTF(fp,"%.*R",hardclip_low,&(this->contents[this->fulllength - hardclip_low]));
     }
   } else {
@@ -5027,7 +5021,7 @@ Shortread_print_chopped_end_revcomp (Filestring_T fp, T this, int hardclip_low,
     } else {
       hardclip_high -= this->left_choplength;
       FPRINTF(fp,"%.*R",hardclip_high,&(this->contents[0]));
-      FPRINTF(fp,"%.*R",this->left_choplength,this->left_chop);
+      FPRINTF(fp,"%R",this->left_chop);
     }
   }
 
@@ -5037,62 +5031,141 @@ Shortread_print_chopped_end_revcomp (Filestring_T fp, T this, int hardclip_low,
 
 /* For samprint XI field and print_fusion */
 void
-Shortread_print_chopped_end_quality (Filestring_T fp, T this, int hardclip_low, int hardclip_high) {
-#ifdef PRINT_INDIVIDUAL_CHARS
+Shortread_print_chopped_end_quality (Filestring_T fp, T this, int hardclip_low, int hardclip_high,
+				     int shift) {
   int i;
-#endif
+  int c;
 
   if (this->quality == NULL) {
     FPRINTF(fp,"*");
     return;
 
   } else if (hardclip_low > 0) {
-#ifdef PRINT_INDIVIDUAL_CHARS
-    for (i = 0; i < hardclip_low; i++) {
-      FPRINTF(fp,"%c",this->quality[i]);
+    if (shift == 0) {
+      if (this->left_choplength == 0) {
+	FPRINTF(fp,"%.*s",hardclip_low,&(this->quality[0]));
+      } else {
+	hardclip_low -= this->left_choplength;
+	FPRINTF(fp,"%s",this->left_chop_quality);
+	FPRINTF(fp,"%.*s",hardclip_low,&(this->quality[0]));
+      }
+
+    } else {
+      hardclip_low -= this->left_choplength;
+      for (i = 0; i < this->left_choplength; i++) {
+	if ((c = this->left_chop_quality[i] + shift) <= 32) {
+	  abort();
+	} else {
+	  FPRINTF(fp,"%c",c);
+	}
+      }
+      for (i = 0; i < hardclip_low; i++) {
+	if ((c = this->quality[i] + shift) <= 32) {
+	  abort();
+	} else {
+	  FPRINTF(fp,"%c",c);
+	}
+      }
     }
-#else
-    FPRINTF(fp,"%.*s",hardclip_low,&(this->quality[0]));
-#endif
-    return;
 
   } else {
-#ifdef PRINT_INDIVIDUAL_CHARS
-    for (i = this->fulllength - hardclip_high; i < this->fulllength; i++) {
-      FPRINTF(fp,"%c",this->quality[i]);
+    if (shift == 0) {
+      if (this->right_choplength == 0) {
+	FPRINTF(fp,"%.*s",hardclip_high,&(this->quality[this->fulllength - hardclip_high]));
+      } else {
+	hardclip_high -= this->right_choplength;
+	FPRINTF(fp,"%.*s",hardclip_high,&(this->quality[this->fulllength - hardclip_high]));
+	FPRINTF(fp,"%s",this->right_chop_quality);
+      }
+    } else {
+      hardclip_high -= this->right_choplength;
+      for (i = this->fulllength - hardclip_high; i < this->fulllength; i++) {
+	if ((c = this->quality[i] + shift) <= 32) {
+	  abort();
+	} else {
+	  FPRINTF(fp,"%c",c);
+	}
+      }
+      for (i = 0; i < this->right_choplength; i++) {
+	if ((c = this->right_chop_quality[i] + shift) <= 32) {
+	  abort();
+	} else {
+	  FPRINTF(fp,"%c",c);
+	}
+      }
     }
-#else
-    FPRINTF(fp,"%.*s",hardclip_high,&(this->quality[this->fulllength - hardclip_high]));
-#endif
-    return;
   }
+
+  return;
 }
 
+
 /* For samprint XI field and print_fusion */
 void
-Shortread_print_chopped_end_quality_reverse (Filestring_T fp, T this, int hardclip_low, int hardclip_high) {
+Shortread_print_chopped_end_quality_reverse (Filestring_T fp, T this, int hardclip_low, int hardclip_high,
+					     int shift) {
+  int i;
+  int c;
 
   if (this->quality == NULL) {
     FPRINTF(fp,"*");
     return;
 
   } else if (hardclip_low > 0) {
-#ifdef PRINT_INDIVIDUAL_CHARS
-    for (i = this->fulllength - 1; i >= this->fulllength - hardclip_low; --i) {
-      FPRINTF(fp,"%c",this->quality[i]);
+    if (shift == 0) {
+      if (this->right_choplength == 0) {
+	FPRINTF(fp,"%.*r",hardclip_low,&(this->quality[this->fulllength - hardclip_low]));
+      } else {
+	hardclip_low -= this->right_choplength;
+	FPRINTF(fp,"%r",this->right_chop_quality);
+	FPRINTF(fp,"%.*r",hardclip_low,&(this->quality[this->fulllength - hardclip_low]));
+      }
+
+    } else {
+      hardclip_low -= this->right_choplength;
+      for (i = this->right_choplength - 1; i >= 0; --i) {
+	if ((c = this->right_chop_quality[i] + shift) <= 32) {
+	  abort();
+	} else {
+	  FPRINTF(fp,"%c",c);
+	}
+      }
+      for (i = this->fulllength - 1; i >= this->fulllength - hardclip_low; --i) {
+	if ((c = this->quality[i] + shift) <= 32) {
+	  abort();
+	} else {
+	  FPRINTF(fp,"%c",c);
+	}
+      }
     }
-#else
-    FPRINTF(fp,"%.*r",hardclip_low,&(this->quality[this->fulllength - hardclip_low]));
-#endif
 
   } else {
-#ifdef PRINT_INDIVIDUAL_CHARS
-    for (i = hardclip_high - 1; i >= 0; --i) {
-      FPRINTF(fp,"%c",this->quality[i]);
+    if (shift == 0) {
+      if (this->left_choplength == 0) {
+	FPRINTF(fp,"%.*r",hardclip_high,&(this->quality[0]));
+      } else {
+	hardclip_high -= this->left_choplength;
+	FPRINTF(fp,"%.*r",hardclip_high,&(this->quality[0]));
+	FPRINTF(fp,"%r",this->left_chop_quality);
+      }
+
+    } else {
+      hardclip_high -= this->left_choplength;
+      for (i = hardclip_high - 1; i >= 0; --i) {
+	if ((c = this->quality[i] + shift) <= 32) {
+	  abort();
+	} else {
+	  FPRINTF(fp,"%c",c);
+	}
+      }
+      for (i = this->left_choplength - 1; i >= 0; --i) {
+	if ((c = this->left_chop_quality[i] + shift) <= 32) {
+	  abort();
+	} else {
+	  FPRINTF(fp,"%c",c);
+	}
+      }
     }
-#else
-    FPRINTF(fp,"%.*r",hardclip_high,&(this->quality[0]));
-#endif
   }
 
   return;


=====================================
src/shortread.h
=====================================
@@ -201,9 +201,11 @@ Shortread_print_chopped_end (Filestring_T fp, T this, int hardclip_low, int hard
 extern void
 Shortread_print_chopped_end_revcomp (Filestring_T fp, T this, int hardclip_low, int hardclip_high);
 extern void
-Shortread_print_chopped_end_quality (Filestring_T fp, T this, int hardclip_low, int hardclip_high);
+Shortread_print_chopped_end_quality (Filestring_T fp, T this, int hardclip_low, int hardclip_high,
+				     int shift);
 extern void
-Shortread_print_chopped_end_quality_reverse (Filestring_T fp, T this, int hardclip_low, int hardclip_high);
+Shortread_print_chopped_end_quality_reverse (Filestring_T fp, T this, int hardclip_low, int hardclip_high,
+					     int shift);
 
 extern void
 Shortread_print_barcode (Filestring_T fp, T this);


=====================================
src/spliceends.c
=====================================
@@ -380,7 +380,10 @@ novel_spliceends_trim5_sense (double *max_prob, bool *partnerp, T this,
     mismatchi--;
   }
 
-  if (plusp) {
+  if (low_univdiagonal > high_univdiagonal) {
+    /* Skip */
+
+  } else if (plusp) {
     debug1(printf("novel_spliceends_trim5_sense, plus\n"));
 
     /* Region from start_genomicpos to middle_genomicpos: require a partner */
@@ -972,7 +975,10 @@ novel_spliceends_trim5_anti (double *max_prob, bool *partnerp, T this,
     mismatchi--;
   }
 
-  if (plusp) {
+  if (low_univdiagonal > high_univdiagonal) {
+    /* Skip */
+
+  } else if (plusp) {
     debug1(printf("novel_spliceends_trim5_anti, plus\n"));
 
     /* Region from start_genomicpos to middle_genomicpos: require a partner */
@@ -1802,7 +1808,10 @@ novel_spliceends_trim3_sense (double *max_prob, bool *partnerp, T this,
     mismatchi--;
   }
 
-  if (plusp) {
+  if (low_univdiagonal > high_univdiagonal) {
+    /* Skip */
+
+  } else if (plusp) {
     debug1(printf("novel_spliceends_trim3_sense, plus\n"));
 
     /* Region from start_genomicpos to middle_genomicpos: require a partner */
@@ -2401,7 +2410,10 @@ novel_spliceends_trim3_anti (double *max_prob, bool *partnerp, T this,
     mismatchi--;
   }
 
-  if (plusp) {
+  if (low_univdiagonal > high_univdiagonal) {
+    /* Skip */
+
+  } else if (plusp) {
     debug1(printf("novel_spliceends_trim3_anti, plus\n"));
 
     /* Region from start_genomicpos to middle_genomicpos: require a partner */


=====================================
src/stage3.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 225447 2022-12-12 21:53:32Z twu $";
+static char rcsid[] = "$Id$";
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
@@ -9346,7 +9346,8 @@ traverse_genome_gap (bool *filledp, bool *shiftp, int *dynprogindex_minor, int *
 		     Genome_T genome, Genome_T genomealt, Pairpool_T pairpool,
 		     Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
 		     Chrpos_T *last_genomedp5, Chrpos_T *last_genomedp3,
-		     int maxpeelback, double defect_rate, bool finalp, bool simplep) {
+		     int maxpeelback, double defect_rate, bool finalp, bool simplep,
+		     bool allow_microexon_p) {
   List_T gappairs, peeled_pairs = NULL, peeled_path = NULL, p;
   Pair_T pair;
   int queryjump, genomejump;
@@ -9641,6 +9642,11 @@ traverse_genome_gap (bool *filledp, bool *shiftp, int *dynprogindex_minor, int *
       *path = Pairpool_transfer(*path,peeled_path);
       introntype = NONINTRON;
 
+    } else if (allow_microexon_p == false) {
+      *filledp = true;
+      debug(printf("Microexons disallowed, so using the given solution\n"));
+      pairs = Pairpool_transfer(pairs,gappairs);
+
     } else {
       *filledp = true;
 
@@ -9679,7 +9685,7 @@ traverse_genome_gap (bool *filledp, bool *shiftp, int *dynprogindex_minor, int *
 							/*new_leftprob*/left_prob,/*new_rightprob*/right_prob,
 							genome,genomealt,chrnum,chroffset,chrhigh,
 							cdna_direction,watsonp) == false) {
-	debug(printf("Shift is worse, so going back to original\n"));
+	debug(printf("Shift is worse, so going back to original, which introduces a dual break\n"));
 	pairs = Pairpool_transfer(pairs,peeled_pairs);
 	*path = Pairpool_transfer(*path,peeled_path);
 	
@@ -11504,7 +11510,7 @@ build_dual_breaks (bool *dual_break_p, int *dynprogindex_minor, int *dynproginde
 				      queryseq_ptr,queryuc_ptr,querylength,
 				      cdna_direction,watsonp,genestrand,jump_late_p,
 				      genome,genomealt,pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5,last_genomedp3,
-				      maxpeelback,defect_rate,/*finalp*/false,simplep);
+				      maxpeelback,defect_rate,/*finalp*/false,simplep,/*allow_microexon_p*/true);
 
 	} else {
 	  debug(printf("  Solving as a dual break\n"));
@@ -12147,7 +12153,7 @@ build_pairs_introns (bool *shiftp, bool *incompletep,
 				  queryseq_ptr,queryuc_ptr,querylength,
 				  cdna_direction,watsonp,genestrand,jump_late_p,
 				  genome,genomealt,pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5,last_genomedp3,
-				  maxpeelback,defect_rate,finalp,simplep);
+				  maxpeelback,defect_rate,finalp,simplep,/*allow_microexon_p*/true);
       /* Previously had forcep == true, because previously thought that adding large gap is not a good solution */
 
       if (filledp == true) {
@@ -18219,6 +18225,7 @@ Stage3_merge_local (T old_left, T old_right,
 				     genome,genomealt,pairpool,dynprogL,dynprogM,dynprogR,maxpeelback);
 
     } else {
+      /* Do not look for microexons, since they can introduce a dual break that is not resolved by this procedure */
       debug10(printf("traverse_genome_gap with cdna_direction %d...",cdna_direction));
       new_right->pairs = traverse_genome_gap(&filledp,&shiftp,&dynprogindex_minor,&dynprogindex_major,
 					     new_right->pairs,&path,leftpair,rightpair,
@@ -18227,7 +18234,8 @@ Stage3_merge_local (T old_left, T old_right,
 					     watsonp,new_right->genestrand,
 					     /*jump_late_p*/watsonp ? false : true,genome,genomealt,pairpool,
 					     dynprogL,dynprogM,dynprogR,/*last_genomedp5*/NULL,/*last_genomedp3*/NULL,
-					     maxpeelback,/*defect_rate*/0,/*finalp*/true,/*simplep*/false);
+					     maxpeelback,/*defect_rate*/0,/*finalp*/true,/*simplep*/false,
+					     /*allow_microexon_p*/false);
       debug10(printf("done"));
       
       if (filledp == false) {



View it on GitLab: https://salsa.debian.org/med-team/gmap/-/commit/776a1a5b7c7ef337675249b9938f4e0a1455d66b

-- 
View it on GitLab: https://salsa.debian.org/med-team/gmap/-/commit/776a1a5b7c7ef337675249b9938f4e0a1455d66b
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20231129/8db4f51c/attachment-0001.htm>


More information about the debian-med-commit mailing list