[med-svn] [Git][med-team/gmap][upstream] New upstream version 2021-02-22+ds

Michael R. Crusoe gitlab at salsa.debian.org
Fri Feb 26 08:51:12 GMT 2021



Michael R. Crusoe pushed to branch upstream at Debian Med / gmap


Commits:
5a9a58d3 by Michael R. Crusoe at 2021-02-26T08:15:13+01:00
New upstream version 2021-02-22+ds
- - - - -


5 changed files:

- ChangeLog
- VERSION
- src/samprint.c
- src/stage3hr.c
- src/substring.c


Changes:

=====================================
ChangeLog
=====================================
@@ -1,3 +1,17 @@
+2021-02-23  twu
+
+    * VERSION, index.html: Updated version number
+
+2021-02-22  twu
+
+    * stage3hr.c: In removing overlaps, handling the case where hitlist or
+      hitpairlist is NULL.  Using low and high instead of genomicstart and
+      genomicend to compute insertlength
+
+    * substring.c: Added commented code
+
+    * samprint.c: Using pairtype for concordant pairs
+
 2021-02-14  twu
 
     * index.html: Updated for latest version


=====================================
VERSION
=====================================
@@ -1 +1 @@
-2021-02-12
\ No newline at end of file
+2021-02-22
\ No newline at end of file


=====================================
src/samprint.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: samprint.c 223774 2020-12-14 19:16:04Z twu $";
+static char rcsid[] = "$Id: samprint.c 223817 2021-02-23 07:37:42Z twu $";
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
@@ -1351,7 +1351,8 @@ SAM_print (Filestring_T fp, Filestring_T fp_failedinput, char *abbrev, Stage3pai
   Hittype_T hittype;
   Substring_T donor, acceptor;
 
-  debug(printf("Entered SAM_print of hit %p with hittype %s\n",this,Stage3end_hittype_string(this)));
+  debug(printf("Entered SAM_print of hit %p with hittype %s and abbrev %s\n",
+	       this,Stage3end_hittype_string(this),abbrev));
 
   /* Test for nomapping was chrpos == 0, but we can actually align to chrpos 0 */
   /* Also, can use this test here because --quiet-if-excessive cases go directly to SAM_print_nomapping */
@@ -1506,15 +1507,38 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 	if (clip_overlap_p == false && merge_overlap_p == false) {
 	  clipdir = 0;
 	  hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
-	  Filestring_set_split_output(fp,OUTPUT_CU);
-	  abbrev = ABBREV_CONCORDANT_UNIQ;
+	  if ((pairtype = Stage3pair_determine_pairtype(stage3pair)) == PAIRED_SCRAMBLE) {
+	    Filestring_set_split_output(fp,OUTPUT_PS);
+	    abbrev = ABBREV_PAIRED_UNIQ_SCR;
+	  } else if (pairtype == PAIRED_INVERSION) {
+	    Filestring_set_split_output(fp,OUTPUT_PI);
+	    abbrev = ABBREV_PAIRED_UNIQ_INV;
+	  } else if (pairtype == PAIRED_TOOLONG) {
+	    Filestring_set_split_output(fp,OUTPUT_PL);
+	    abbrev = ABBREV_PAIRED_UNIQ_LONG;
+	  } else {
+	    Filestring_set_split_output(fp,OUTPUT_CU);
+	    abbrev = ABBREV_CONCORDANT_UNIQ;
+	  }
 	  
 	} else {
 	  clipdir = Stage3pair_overlap(&hardclip5_low,&hardclip5_high,&hardclip3_low,&hardclip3_high,stage3pair);
 	  debug3(printf("clipdir %d with hardclip5 = %d..%d, hardclip3 = %d..%d\n",
 			clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high));
 	  Filestring_set_split_output(fp,OUTPUT_CU);
-	  abbrev = ABBREV_CONCORDANT_UNIQ;
+	  if ((pairtype = Stage3pair_determine_pairtype(stage3pair)) == PAIRED_SCRAMBLE) {
+	    Filestring_set_split_output(fp,OUTPUT_PS);
+	    abbrev = ABBREV_PAIRED_UNIQ_SCR;
+	  } else if (pairtype == PAIRED_INVERSION) {
+	    Filestring_set_split_output(fp,OUTPUT_PI);
+	    abbrev = ABBREV_PAIRED_UNIQ_INV;
+	  } else if (pairtype == PAIRED_TOOLONG) {
+	    Filestring_set_split_output(fp,OUTPUT_PL);
+	    abbrev = ABBREV_PAIRED_UNIQ_LONG;
+	  } else {
+	    Filestring_set_split_output(fp,OUTPUT_CU);
+	    abbrev = ABBREV_CONCORDANT_UNIQ;
+	  }
 	}
       }
 
@@ -1706,7 +1730,21 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 
       } else {
 	/* Stage3pair_eval(stage3pairarray,npaths,maxpaths_report,queryseq1,queryseq2); */
-	Filestring_set_split_output(fp,OUTPUT_CM);
+	stage3pair = stage3pairarray[0];
+	if ((pairtype = Stage3pair_determine_pairtype(stage3pair)) == PAIRED_SCRAMBLE) {
+	  Filestring_set_split_output(fp,OUTPUT_PM);
+	  abbrev = ABBREV_PAIRED_MULT;
+	} else if (pairtype == PAIRED_INVERSION) {
+	  Filestring_set_split_output(fp,OUTPUT_PM);
+	  abbrev = ABBREV_PAIRED_MULT;
+	} else if (pairtype == PAIRED_TOOLONG) {
+	  Filestring_set_split_output(fp,OUTPUT_PM);
+	  abbrev = ABBREV_PAIRED_MULT;
+	} else {
+	  Filestring_set_split_output(fp,OUTPUT_CM);
+	  abbrev = ABBREV_CONCORDANT_MULT;
+	}
+
 	for (pathnum = 1; pathnum <= npaths_primary + npaths_altloc && pathnum <= maxpaths_report; pathnum++) {
 
 	  stage3pair = stage3pairarray[pathnum-1];
@@ -1737,7 +1775,7 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 
 	  if (merge_overlap_p == false || clipdir == 0) {
 	    /* print first end */
-	    SAM_print(fp,fp_failedinput_1,ABBREV_CONCORDANT_MULT,stage3pair,
+	    SAM_print(fp,fp_failedinput_1,abbrev,stage3pair,
 		      hit5,querylength5,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
 		      Stage3pair_mapq_score(stage3pair),chromosome_iit,
@@ -1749,7 +1787,7 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 		      quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 
 	    /* print second end */
-	    SAM_print(fp,fp_failedinput_2,ABBREV_CONCORDANT_MULT,stage3pair,
+	    SAM_print(fp,fp_failedinput_2,abbrev,stage3pair,
 		      hit3,querylength3,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
 		      Stage3pair_mapq_score(stage3pair),chromosome_iit,
@@ -1789,7 +1827,7 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 				Stage3pair_absmq_score(stage3pair),first_absmq,/*invertp*/false,
 				/*invert_mate_p*/false,/*supplementaryp*/false);
 
-	    Simplepair_overlap_print_sam(fp,ABBREV_CONCORDANT_MULT,pairarray,npairs,
+	    Simplepair_overlap_print_sam(fp,abbrev,pairarray,npairs,
 					 acc1,/*acc2*/NULL,Stage3end_chrnum(hit5),chromosome_iit,
 					 /*queryseq_ptr*/queryseq_merged,/*quality_string*/quality_merged,
 					 /*hardclip_low*/0,/*hardclip_high*/0,/*querylength*/querylength_merged,


=====================================
src/stage3hr.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 223808 2021-02-13 18:47:25Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 223819 2021-02-23 07:39:23Z twu $";
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
@@ -6537,7 +6537,7 @@ pair_insert_length (int *pair_relationship, Stage3end_T hit5, Stage3end_T hit3)
 
   /* No overlap found between any combination of substrings */
   if (hit5->plusp == true) {
-    if (hit5->genomicend > hit3->genomicstart + hit5->querylength + hit3->querylength) {
+    if (hit5->high > hit3->low + hit5->querylength + hit3->querylength) {
       debug10(printf("pair_insert_length: no overlap found, and %u - %u + %d + %d < 0, so returning 0\n",
 		     hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset,
 		     hit5->querylength,hit3->querylength));
@@ -6549,10 +6549,10 @@ pair_insert_length (int *pair_relationship, Stage3end_T hit5, Stage3end_T hit3)
 		     hit5->querylength,hit3->querylength));
     }
     *pair_relationship = +1;
-    return hit3->genomicstart - hit5->genomicend + hit5->querylength + hit3->querylength;
+    return hit3->low - hit5->high + hit5->querylength + hit3->querylength;
 
   } else {
-    if (hit3->genomicstart > hit5->genomicend + hit5->querylength + hit3->querylength) {
+    if (hit3->high > hit5->low + hit5->querylength + hit3->querylength) {
       debug10(printf("pair_insert_length: no overlap found, and %u - %u + %d + %d < 0, so returning 0\n",
 		     hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset,
 		     hit5->querylength,hit3->querylength));
@@ -6563,7 +6563,7 @@ pair_insert_length (int *pair_relationship, Stage3end_T hit5, Stage3end_T hit3)
 		     hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset,
 		     hit5->querylength,hit3->querylength));
       *pair_relationship = -1;
-      return hit5->genomicend - hit3->genomicstart + hit5->querylength + hit3->querylength;
+      return hit5->low - hit3->high + hit5->querylength + hit3->querylength;
     }
   }
 }
@@ -7057,9 +7057,11 @@ Stage3end_optimal_score_final_old (bool *eliminatedp, List_T hitlist, Hitlistpoo
     }
   }
   Hitlist_free(&hitlist);
-  hitlist = optimal;
-  optimal = (List_T) NULL;
-
+  if ((hitlist = optimal) == NULL) {
+    return (List_T) NULL;
+  } else {
+    optimal = (List_T) NULL;
+  }
 
   /* Prune based on ref_nmatches_plus_spliced_trims (to get the splice ends) */
   max_nmatches = 0;
@@ -7133,8 +7135,11 @@ Stage3end_optimal_score_final_old (bool *eliminatedp, List_T hitlist, Hitlistpoo
     }
   }
   Hitlist_free(&hitlist);
-  hitlist = optimal;
-  optimal = (List_T) NULL;
+  if ((hitlist = optimal) == NULL) {
+    return (List_T) NULL;
+  } else {
+    optimal = (List_T) NULL;
+  }
 
 
   /* Eliminate within loci (1): refalt_nmatches_to_trims only */
@@ -7195,9 +7200,12 @@ Stage3end_optimal_score_final_old (bool *eliminatedp, List_T hitlist, Hitlistpoo
   }
   FREE(hits);
   FREE(eliminate);
-  hitlist = optimal;
-  optimal = (List_T) NULL;
-
+  if ((hitlist = optimal) == NULL) {
+    return (List_T) NULL;
+  } else {
+    optimal = (List_T) NULL;
+  }
+  
 
   /* Eliminate within loci (2): nsegments and splice score */
   keptp = false;
@@ -9426,7 +9434,12 @@ Stage3end_remove_overlaps (List_T hitlist, Hitlistpool_T hitlistpool, int queryl
   /* Prune based on nmatches adjusted by score to get a tradeoff between matches and parsimony */
   /* Same as step 1 of Stage3pair_optimal_score_final */
   debug8(printf("  Step 3.  Maximize nmatches adjusted by score (with slop)\n"));
-  optimal = (List_T) NULL;
+  if (hitlist == NULL) {
+    return (List_T) NULL;
+  } else {
+    optimal = (List_T) NULL;
+  }
+  
 
   keptp = false;
   hits = (T *) List_to_array_n(&n,hitlist);
@@ -9493,13 +9506,16 @@ Stage3end_remove_overlaps (List_T hitlist, Hitlistpool_T hitlistpool, int queryl
   }
   FREE(hits);
   FREE(eliminate);
-  hitlist = optimal;
-
+  if ((hitlist = optimal) == NULL) {
+    return (List_T) NULL;
+  } else {
+    optimal = (List_T) NULL;
+  }
+  
 
   /* Eliminate within loci: minimize nsegments and maximize splice score */
   /* Since we have achieved same number of matches, we should minimize nsegments to achieve parsimony */
   debug4(printf("  Step 4.  Minimize nsegments and splice score\n"));
-  optimal = (List_T) NULL;
 
   keptp = false;
   hits = (T *) List_to_array_n(&n,hitlist);
@@ -11469,11 +11485,11 @@ Stage3pair_new (T hit5_orig, T hit3_orig, int genestrand, int sensedir, Pairtype
     /*   or 3-end..start and 5-start..end */
 
     new->pair_relationship = 0;
-    if (hit5->genomicend < hit3->genomicend) {
-      new->insertlength = (hit3->genomicend - hit5->genomicend) + querylength5 + querylength3;
+    if (hit5->high < hit3->low) {
+      new->insertlength = (hit3->low - hit5->high) + querylength5 + querylength3;
       new->insertlength_expected_sign = insertlength_expected(new->insertlength);
-    } else if (hit3->genomicstart < hit5->genomicstart) {
-      new->insertlength = (hit5->genomicstart - hit3->genomicstart) + querylength5 + querylength3;
+    } else if (hit3->high < hit5->low) {
+      new->insertlength = (hit5->low - hit3->high) + querylength5 + querylength3;
       new->insertlength_expected_sign = insertlength_expected(new->insertlength);
     } else {
       new->insertlength = pair_insert_length_unpaired(hit5,hit3); /* was 0 */
@@ -11488,11 +11504,11 @@ Stage3pair_new (T hit5_orig, T hit3_orig, int genestrand, int sensedir, Pairtype
     /*   or 3-start..end and 5-end..start */
 
     new->pair_relationship = 0;
-    if (hit5->genomicstart < hit3->genomicstart) {
-      new->insertlength = (hit3->genomicstart - hit5->genomicstart) + querylength5 + querylength3;
+    if (hit5->high < hit3->low) {
+      new->insertlength = (hit3->low - hit5->high) + querylength5 + querylength3;
       new->insertlength_expected_sign = insertlength_expected(new->insertlength);
-    } else if (hit3->genomicend < hit5->genomicend) {
-      new->insertlength = (hit5->genomicend - hit3->genomicend) + querylength5 + querylength3;
+    } else if (hit3->high < hit5->low) {
+      new->insertlength = (hit5->high - hit3->low) + querylength5 + querylength3;
       new->insertlength_expected_sign = insertlength_expected(new->insertlength);
     } else {
       new->insertlength = pair_insert_length_unpaired(hit5,hit3); /* was 0 */
@@ -11609,10 +11625,10 @@ Stage3pair_new (T hit5_orig, T hit3_orig, int genestrand, int sensedir, Pairtype
     }
 
     /* Have 5-start..end and 3-start..end */
-    if (hit5->genomicend < hit3->genomicstart) {
+    if (hit5->high < hit3->low) {
       /* No overlap */
       new->pair_relationship = +1;
-      new->insertlength = (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
+      new->insertlength = (hit3->low - hit5->high) + querylength5 + querylength3;
       new->insertlength_expected_sign = insertlength_expected(new->insertlength);
       debug10(printf("plus, no overlap: insert length %d = start3 %u - end5 %u + %d + %d\n",
 		     new->insertlength,hit3->genomicstart - hit3->chroffset,
@@ -11743,10 +11759,10 @@ Stage3pair_new (T hit5_orig, T hit3_orig, int genestrand, int sensedir, Pairtype
     }
 
     /* Have 3-end..start and 5-end..start */
-    if (hit3->genomicstart < hit5->genomicend) {
+    if (hit3->high < hit5->low) {
       /* No overlap */
       new->pair_relationship = -1;
-      new->insertlength = (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
+      new->insertlength = (hit5->low - hit3->high) + querylength5 + querylength3;
       new->insertlength_expected_sign = insertlength_expected(new->insertlength);
       debug10(printf("minus, no overlap: insert length %d = end5 %u - start3 %u + %d + %d\n",
 		     new->insertlength,hit5->genomicend - hit5->chroffset,
@@ -13456,7 +13472,12 @@ Stage3pair_remove_overlaps (List_T hitpairlist, Hitlistpool_T hitlistpool,
   /* Prune based on nmatches adjusted by score to get a tradeoff between matches and parsimony */
   /* Same as step 1 of Stage3pair_optimal_score_final */
   debug8(printf("  Step 3.  Maximize nmatches adjusted by score (with slop)\n"));
-  optimal = (List_T) NULL;
+  if (hitpairlist == NULL) {
+    return (List_T) NULL;
+  } else {
+    optimal = (List_T) NULL;
+  }
+  
 
   keptp = false;
   hitpairs = (Stage3pair_T *) List_to_array_n(&n,hitpairlist);
@@ -13535,13 +13556,16 @@ Stage3pair_remove_overlaps (List_T hitpairlist, Hitlistpool_T hitlistpool,
   }
   FREE(hitpairs);
   FREE(eliminate);
-  hitpairlist = optimal;
-
+  if ((hitpairlist = optimal) == NULL) {
+    return (List_T) NULL;
+  } else {
+    optimal = (List_T) NULL;
+  }
+  
 
   /* Eliminate within loci: minimize nsegments and maximize splice score (for approximately equal insertlengths) */
   /* Since we have achieved same number of matches, we should minimize nsegments to achieve parsimony */
   debug8(printf("  Step 4.  Minimize nsegments and splice score (for approximately equal insertlengths)\n"));
-  optimal = (List_T) NULL;
 
   keptp = false;
   hitpairs = (Stage3pair_T *) List_to_array_n(&n,hitpairlist);
@@ -13644,12 +13668,15 @@ Stage3pair_remove_overlaps (List_T hitpairlist, Hitlistpool_T hitlistpool,
   }
   FREE(hitpairs);
   FREE(eliminate);
-  hitpairlist = optimal;
-
+  if ((hitpairlist = optimal) == NULL) {
+    return (List_T) NULL;
+  } else {
+    optimal = (List_T) NULL;
+  }
+  
 
   /* Eliminate within loci: minimize outerlength */
   debug8(printf("  Step 5.  Minimize outerlength\n"));
-  optimal = (List_T) NULL;
 
   keptp = false;
   hitpairs = (Stage3pair_T *) List_to_array_n(&n,hitpairlist);
@@ -14558,8 +14585,11 @@ Stage3pair_optimal_score_final_old (bool *eliminatedp, List_T hitpairlist,
     }
   }
   Hitlist_free(&hitpairlist);
-  hitpairlist = optimal;
-  optimal = (List_T) NULL;
+  if ((hitpairlist = optimal) == NULL) {
+    return (List_T) NULL;
+  } else {
+    optimal = (List_T) NULL;
+  }
 
 
   /* Eliminate within loci (1): refalt_nmatches_to_trims_only */
@@ -14629,8 +14659,11 @@ Stage3pair_optimal_score_final_old (bool *eliminatedp, List_T hitpairlist,
   }
   FREE(hitpairs);
   FREE(eliminate);
-  hitpairlist = optimal;
-  optimal = (List_T) NULL;
+  if ((hitpairlist = optimal) == NULL) {
+    return (List_T) NULL;
+  } else {
+    optimal = (List_T) NULL;
+  }
 
 
   /* Eliminate within loci (2): nsegments and splice score */


=====================================
src/substring.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 223553 2020-11-16 01:27:22Z twu $";
+static char rcsid[] = "$Id: substring.c 223818 2021-02-23 07:38:29Z twu $";
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
@@ -1780,11 +1780,13 @@ Substring_new (int nmismatches, int ref_nmismatches, Univcoord_T left,
       new->querystart_chrbound = 0;
     } else {
       new->querystart_chrbound = chroffset - left;
+      /* new->genomicstart += new->querystart_chrbound; */
     }
     if (left + querylength < chrhigh) {
       new->queryend_chrbound = querylength;
     } else {
       new->queryend_chrbound = chrhigh - left;
+      /* new->genomicend -= (querylength - new->queryend_chrbound); */
     }
 
   } else {
@@ -1795,11 +1797,13 @@ Substring_new (int nmismatches, int ref_nmismatches, Univcoord_T left,
       new->querystart_chrbound = 0;
     } else {
       new->querystart_chrbound = (left + querylength) - chrhigh;
+      /* new->genomicstart -= new->querystart_chrbound; */
     }
     if (left + querylength > chroffset + querylength) {
       new->queryend_chrbound = querylength;
     } else {
       new->queryend_chrbound = (left + querylength) - chroffset;
+      /* new->genomicend += (querylength - new->queryend_chrbound); */
     }
   }
 
@@ -2334,11 +2338,13 @@ Substring_new_simple (int nmismatches, Univcoord_T left, int querystart, int que
       new->querystart_chrbound = 0;
     } else {
       new->querystart_chrbound = chroffset - left;
+      /* new->genomicstart += new->querystart_chrbound; */
     }
     if (left + querylength < chrhigh) {
       new->queryend_chrbound = querylength;
     } else {
       new->queryend_chrbound = chrhigh - left;
+      /* new->genomicend -= (querylength - new->queryend_chrbound); */
     }
 
   } else {
@@ -2349,11 +2355,13 @@ Substring_new_simple (int nmismatches, Univcoord_T left, int querystart, int que
       new->querystart_chrbound = 0;
     } else {
       new->querystart_chrbound = (left + querylength) - chrhigh;
+      /* new->genomicstart -= new->querystart_chrbound; */
     }
     if (left + querylength > chroffset + querylength) {
       new->queryend_chrbound = querylength;
     } else {
       new->queryend_chrbound = (left + querylength) - chroffset;
+      /* new->genomicend += (querylength - new->queryend_chrbound); */
     }
   }
 



View it on GitLab: https://salsa.debian.org/med-team/gmap/-/commit/5a9a58d3c47ac031dd4d156009f72f705475b655

-- 
View it on GitLab: https://salsa.debian.org/med-team/gmap/-/commit/5a9a58d3c47ac031dd4d156009f72f705475b655
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210226/5bd9b4fa/attachment-0001.htm>


More information about the debian-med-commit mailing list