[med-svn] [Git][med-team/gmap][upstream] New upstream version 2023-10-10.v2+ds
Étienne Mollier (@emollier)
gitlab at salsa.debian.org
Wed Nov 29 22:16:28 GMT 2023
Étienne Mollier pushed to branch upstream at Debian Med / gmap
Commits:
776a1a5b by Étienne Mollier at 2023-11-29T22:56:02+01:00
New upstream version 2023-10-10.v2+ds
- - - - -
7 changed files:
- src/path-fusion.c
- src/path-print-sam.c
- src/path-solve.c
- src/shortread.c
- src/shortread.h
- src/spliceends.c
- src/stage3.c
Changes:
=====================================
src/path-fusion.c
=====================================
@@ -3256,13 +3256,19 @@ Path_fusion_inner_qend (int *found_score, T fusion5, T anchor3,
debug(printf("Looking for qend hit in %d..%d at %u..%u\n",
pos5,querylength,low_univdiagonal,high_univdiagonal));
- new_univdiagonal = Spliceends_qend_resolve(&local_nmismatches,pos5,querylength,
- low_univdiagonal,high_univdiagonal,
- query_compress_main,queryptr_main,main_plusp,genestrand,
- novel_diagonals_alloc,localdb_alloc,
- streamspace_max_alloc,streamspace_alloc,
- streamptr_alloc,streamsize_alloc,mergeinfo,
- nmismatches_allowed);
+ if (low_univdiagonal > high_univdiagonal) {
+ debug(printf("Skipping because %u > %u\n",low_univdiagonal,high_univdiagonal));
+ new_univdiagonal = 0;
+ } else {
+ new_univdiagonal = Spliceends_qend_resolve(&local_nmismatches,pos5,querylength,
+ low_univdiagonal,high_univdiagonal,
+ query_compress_main,queryptr_main,main_plusp,genestrand,
+ novel_diagonals_alloc,localdb_alloc,
+ streamspace_max_alloc,streamspace_alloc,
+ streamptr_alloc,streamsize_alloc,mergeinfo,
+ nmismatches_allowed);
+ }
+
} else {
goal_univdiagonal = add_bounded(Path_genomichigh(anchor3) - anchor3->querylength,
/*expected_pairlength*/30000,anchor3->chrhigh);
@@ -3298,13 +3304,17 @@ Path_fusion_inner_qend (int *found_score, T fusion5, T anchor3,
debug(printf("Looking for qstart hit in 0..%d at %u..%u\n",
pos3,low_univdiagonal,high_univdiagonal));
- new_univdiagonal = Spliceends_qstart_resolve(&local_nmismatches,pos3,querylength,
- low_univdiagonal,high_univdiagonal,
- query_compress_main,queryptr_main,main_plusp,genestrand,
- novel_diagonals_alloc,localdb_alloc,
- streamspace_max_alloc,streamspace_alloc,
- streamptr_alloc,streamsize_alloc,mergeinfo,
- nmismatches_allowed);
+ if (low_univdiagonal > high_univdiagonal) {
+ new_univdiagonal = 0;
+ } else {
+ new_univdiagonal = Spliceends_qstart_resolve(&local_nmismatches,pos3,querylength,
+ low_univdiagonal,high_univdiagonal,
+ query_compress_main,queryptr_main,main_plusp,genestrand,
+ novel_diagonals_alloc,localdb_alloc,
+ streamspace_max_alloc,streamspace_alloc,
+ streamptr_alloc,streamsize_alloc,mergeinfo,
+ nmismatches_allowed);
+ }
}
if (new_univdiagonal == 0) {
@@ -3465,13 +3475,18 @@ Path_fusion_inner_qstart (int *found_score, T fusion3, T anchor5,
debug(printf("Looking for qstart hit in %d..%d at %u..%u\n",
0,pos3,low_univdiagonal,high_univdiagonal));
- new_univdiagonal = Spliceends_qstart_resolve(&local_nmismatches,pos3,querylength,
- low_univdiagonal,high_univdiagonal,
- query_compress_main,queryptr_main,main_plusp,genestrand,
- novel_diagonals_alloc,localdb_alloc,
- streamspace_max_alloc,streamspace_alloc,
- streamptr_alloc,streamsize_alloc,mergeinfo,
- nmismatches_allowed);
+ if (low_univdiagonal > high_univdiagonal) {
+ new_univdiagonal = 0;
+ } else {
+ new_univdiagonal = Spliceends_qstart_resolve(&local_nmismatches,pos3,querylength,
+ low_univdiagonal,high_univdiagonal,
+ query_compress_main,queryptr_main,main_plusp,genestrand,
+ novel_diagonals_alloc,localdb_alloc,
+ streamspace_max_alloc,streamspace_alloc,
+ streamptr_alloc,streamsize_alloc,mergeinfo,
+ nmismatches_allowed);
+ }
+
} else {
goal_univdiagonal = subtract_bounded(Path_genomiclow(anchor5) + anchor5->querylength + fusion3->querylength,
/*expected_pairlength*/30000,anchor5->chroffset + anchor5->querylength);
@@ -3508,13 +3523,17 @@ Path_fusion_inner_qstart (int *found_score, T fusion3, T anchor5,
debug(printf("Looking for qend hit in %d..%d at %u..%u\n",
pos5,querylength,low_univdiagonal,high_univdiagonal));
- new_univdiagonal = Spliceends_qend_resolve(&local_nmismatches,pos5,querylength,
- low_univdiagonal,high_univdiagonal,
- query_compress_main,queryptr_main,main_plusp,genestrand,
- novel_diagonals_alloc,localdb_alloc,
- streamspace_max_alloc,streamspace_alloc,
- streamptr_alloc,streamsize_alloc,mergeinfo,
- nmismatches_allowed);
+ if (low_univdiagonal > high_univdiagonal) {
+ new_univdiagonal = 0;
+ } else {
+ new_univdiagonal = Spliceends_qend_resolve(&local_nmismatches,pos5,querylength,
+ low_univdiagonal,high_univdiagonal,
+ query_compress_main,queryptr_main,main_plusp,genestrand,
+ novel_diagonals_alloc,localdb_alloc,
+ streamspace_max_alloc,streamspace_alloc,
+ streamptr_alloc,streamsize_alloc,mergeinfo,
+ nmismatches_allowed);
+ }
}
if (new_univdiagonal == 0) {
=====================================
src/path-print-sam.c
=====================================
@@ -894,9 +894,9 @@ print_substrings (int *hardclip_low, int *hardclip_high,
if (Shortread_quality_string(queryseq) != NULL) {
FPRINTF(fp,"\tXI:Z:");
if (path->plusp == true) {
- Shortread_print_chopped_end_quality(fp,queryseq,*hardclip_low,*hardclip_high);
+ Shortread_print_chopped_end_quality(fp,queryseq,*hardclip_low,*hardclip_high,quality_shift);
} else {
- Shortread_print_chopped_end_quality_reverse(fp,queryseq,*hardclip_low,*hardclip_high);
+ Shortread_print_chopped_end_quality_reverse(fp,queryseq,*hardclip_low,*hardclip_high,quality_shift);
}
}
}
@@ -1323,7 +1323,7 @@ print_fusion (Filestring_T fp, char *abbrev, T path,
int pairedlength, int pair_relationship, int hardclip_low, int hardclip_high,
T mate, Resulttype_T resulttype, bool first_read_p, bool artificial_mate_p, int npaths_mate,
- char *sam_read_group_id, bool invertp, bool invert_mate_p) {
+ int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p) {
unsigned int flag = 0U;
int hardclip_low_ignore, hardclip_high_ignore;
@@ -1483,13 +1483,13 @@ print_fusion (Filestring_T fp, char *abbrev, T path,
} else {
FPRINTF(fp,"\t");
if (path->plusp == true && path->fusion_plusp == true) {
- Shortread_print_chopped_end_quality(fp,queryseq,hardclip_low,hardclip_high);
+ Shortread_print_chopped_end_quality(fp,queryseq,hardclip_low,hardclip_high,quality_shift);
} else if (path->plusp == true && path->fusion_plusp == false) {
- Shortread_print_chopped_end_quality_reverse(fp,queryseq,hardclip_high,hardclip_low);
+ Shortread_print_chopped_end_quality_reverse(fp,queryseq,hardclip_high,hardclip_low,quality_shift);
} else if (path->plusp == false && path->fusion_plusp == true) {
- Shortread_print_chopped_end_quality(fp,queryseq,hardclip_high,hardclip_low);
+ Shortread_print_chopped_end_quality(fp,queryseq,hardclip_high,hardclip_low,quality_shift);
} else {
- Shortread_print_chopped_end_quality_reverse(fp,queryseq,hardclip_low,hardclip_high);
+ Shortread_print_chopped_end_quality_reverse(fp,queryseq,hardclip_low,hardclip_high,quality_shift);
}
}
}
@@ -1908,7 +1908,7 @@ Path_print_sam (Filestring_T fp, Filestring_T *fp_failedinput, char *abbrev,
queryseq,mate_queryseq,single_cell_infoseq,pairedlength,pair_relationship,
hardclip_low,hardclip_high,mate,
resulttype,first_read_p,artificial_mate_p,
- npaths_mate,sam_read_group_id,invertp,invert_mate_p);
+ npaths_mate,quality_shift,sam_read_group_id,invertp,invert_mate_p);
}
return;
=====================================
src/path-solve.c
=====================================
@@ -246,7 +246,7 @@ attach_indel_qstart_simple (int adj, T path, int indel_pos,
/* Returns a newpath without modifying or deleting path */
static T
attach_indel_qstart (T path, Univcoord_T low_univdiagonal, int low_qstart,
- Univcoord_T chrhigh, int querylength, Indelinfo_T indelinfo,
+ Univcoord_T chroffset, Univcoord_T chrhigh, int querylength, Indelinfo_T indelinfo,
Compress_T query_compress, bool plusp, int genestrand,
int max_insertionlen, int max_deletionlen,
Intlistpool_T intlistpool, Univcoordlistpool_T univcoordlistpool,
@@ -286,6 +286,10 @@ attach_indel_qstart (T path, Univcoord_T low_univdiagonal, int low_qstart,
/* Impossible */
debug13(printf("Impossible\n"));
+ } else if (low_univdiagonal + low_qstart < chroffset + querylength) {
+ debug13(printf("Extends beyond start of chromosome: low_univdiagonal %u - querylength %d + low_qstart %d vs chroffset %u\n",
+ low_univdiagonal,querylength,low_qstart,chroffset));
+
} else if (low_univdiagonal > univdiagonal) {
/* (A) Insertion */
nindels = low_univdiagonal - univdiagonal;
@@ -670,6 +674,10 @@ attach_unknown_qstart (T path, Univcoord_T low_univdiagonal, int low_qstart,
if (low_qstart >= qend) {
debug13(printf("Does not add to start of path: low_qstart %d >= qend %d\n",low_qstart,qend));
+ } else if (low_univdiagonal + low_qstart < chroffset + querylength) {
+ debug13(printf("Extends beyond start of chromosome: low_univdiagonal %u - querylength %d + low_qstart %u vs chroffset %u\n",
+ low_univdiagonal,querylength,low_qstart,chroffset));
+
} else if (low_univdiagonal == univdiagonal) {
if (low_qstart >= Intlist_head(path->endpoints)) {
debug13(printf("Mismatch fails, since new endpoint %d >= old endpoint %d\n",low_qstart,Intlist_head(path->endpoints)));
@@ -1433,6 +1441,10 @@ attach_indel_qend (T path, Univcoord_T high_univdiagonal, int high_qend,
/* Impossible */
debug13(printf("Impossible\n"));
+ } else if (high_univdiagonal + high_qend >= chrhigh + querylength) {
+ debug13(printf("Extends beyond end of chromosome: high_univdiagonal %u - querylength %d + high_qend %d vs chrhigh %u\n",
+ high_univdiagonal,querylength,high_qend,chrhigh));
+
} else if (high_univdiagonal < univdiagonal) {
/* (A) Insertion */
nindels = univdiagonal - high_univdiagonal;
@@ -1829,6 +1841,10 @@ attach_unknown_qend (T path, Univcoord_T high_univdiagonal, int high_qstart, int
if (qstart >= high_qend) {
debug13(printf("Does not add to end of path: qstart %d >= high_qend %d\n",qstart,high_qend));
+ } else if (high_univdiagonal + high_qend >= chrhigh + querylength) {
+ debug13(printf("Extends beyond end of chromosome: high_univdiagonal %u - querylength %d + high_qend %d vs chrhigh %u\n",
+ high_univdiagonal,querylength,high_qend,chrhigh));
+
} else if (high_univdiagonal == univdiagonal) {
if (high_qend <= Intlist_head(path->endpoints)) {
debug13(printf("Mismatch fails, since new endpoint %d <= old endpoint %d\n",high_qend,Intlist_head(path->endpoints)));
@@ -3290,7 +3306,7 @@ compute_qstart_local (List_T *unextended_paths, List_T *complete_paths,
/* Subtract adj to get low diagonal, but add adj to get high diagonal */
if ((newpath = attach_indel_qstart(path,/*low_diagonal*/univdiagonal - adj,/*low_qstart*/trimpos,
- chrhigh,querylength,indelinfo,
+ chroffset,chrhigh,querylength,indelinfo,
query_compress,plusp,genestrand,
max_insertionlen,max_deletionlen,
intlistpool,univcoordlistpool,listpool,pathpool,vectorpool)) != NULL) {
=====================================
src/shortread.c
=====================================
@@ -4801,17 +4801,6 @@ Shortread_print_chopped_revcomp_sam (Filestring_T fp, T this, int hardclip_low,
void
Shortread_print_hardclipped_sam (Filestring_T fp, T this, int hardclip_low, int hardclip_high) {
-#ifdef PRINT_INDIVIDUAL_CHARS
- int i;
-#endif
-
-#ifdef PRINT_INDIVIDUAL_CHARS
- FPRINTF(fp,"\t");
- for (i = hardclip_low; i < this->fulllength - hardclip_high; i++) {
- FPRINTF(fp,"%c",this->contents[i]);
- }
-#else
- FPRINTF(fp,"\t");
/* Hardclip on same side as chop includes choplength */
if (this->left_chop != NULL && hardclip_low > 0) {
@@ -4821,14 +4810,14 @@ Shortread_print_hardclipped_sam (Filestring_T fp, T this, int hardclip_low, int
hardclip_high -= this->right_choplength;
}
+ FPRINTF(fp,"\t");
if (this->left_chop != NULL && hardclip_low == 0) {
- FPRINTF(fp,"%.*s",this->left_choplength,this->left_chop);
+ FPRINTF(fp,"%s",this->left_chop);
}
FPRINTF(fp,"%.*s",this->fulllength - hardclip_high - hardclip_low,&(this->contents[hardclip_low]));
if (this->right_chop != NULL && hardclip_high == 0) {
- FPRINTF(fp,"%.*s",this->right_choplength,this->right_chop);
+ FPRINTF(fp,"%s",this->right_chop);
}
-#endif
return;
}
@@ -4837,14 +4826,6 @@ Shortread_print_hardclipped_sam (Filestring_T fp, T this, int hardclip_low, int
void
Shortread_print_hardclipped_revcomp_sam (Filestring_T fp, T this, int hardclip_low, int hardclip_high) {
-#ifdef PRINT_INDIVIDUAL_CHARS
- FPRINTF(fp,"\t");
- for (i = this->fulllength - 1 - hardclip_low; i >= hardclip_high; --i) {
- FPRINTF(fp,"%c",complCode[(int) this->contents[i]]);
- }
-#else
- FPRINTF(fp,"\t");
-
/* Hardclip on same side as chop includes choplength */
if (this->right_chop != NULL && hardclip_low > 0) {
hardclip_low -= this->right_choplength;
@@ -4853,14 +4834,14 @@ Shortread_print_hardclipped_revcomp_sam (Filestring_T fp, T this, int hardclip_l
hardclip_high -= this->left_choplength;
}
+ FPRINTF(fp,"\t");
if (this->right_chop != NULL && hardclip_low == 0) {
- FPRINTF(fp,"%.*R",this->right_choplength,this->right_chop);
+ FPRINTF(fp,"%R",this->right_chop);
}
FPRINTF(fp,"%.*R",this->fulllength - hardclip_high - hardclip_low,&(this->contents[hardclip_high]));
if (this->left_chop != NULL && hardclip_high == 0) {
- FPRINTF(fp,"%.*R",this->left_choplength,this->left_chop);
+ FPRINTF(fp,"%R",this->left_chop);
}
-#endif
return;
}
@@ -4876,45 +4857,52 @@ Shortread_print_hardclipped_quality (Filestring_T fp, T this, int hardclip_low,
if (this->quality == NULL) {
FPRINTF(fp,"\t*");
- } else if (shift == 0) {
- FPRINTF(fp,"\t");
- if (this->left_chop_quality != NULL && hardclip_low == 0) {
- FPRINTF(fp,"%.*s",this->left_choplength,this->left_chop_quality);
+ } else {
+ if (this->left_chop_quality != NULL && hardclip_low > 0) {
+ hardclip_low -= this->left_choplength;
}
- FPRINTF(fp,"%.*s",this->fulllength - hardclip_high - hardclip_low,&(this->quality[hardclip_low]));
- if (this->right_chop_quality != NULL && hardclip_high == 0) {
- FPRINTF(fp,"%.*s",this->right_choplength,this->right_chop_quality);
+ if (this->right_chop_quality != NULL && hardclip_high > 0) {
+ hardclip_high -= this->right_choplength;
}
- } else {
- FPRINTF(fp,"\t");
- if (this->left_chop_quality != NULL && hardclip_low == 0) {
- for (i = 0; i < this->left_choplength; i++) {
- if ((c = this->left_chop_quality[i] + shift) <= 32) {
- fprintf(stderr,"Warning: With a quality-print-shift of %d, QC score %c becomes non-printable. May need to specify --quality-protocol or --quality-print-shift\n",
- shift,this->left_chop_quality[i]);
+ if (shift == 0) {
+ FPRINTF(fp,"\t");
+ if (this->left_chop_quality != NULL && hardclip_low == 0) {
+ FPRINTF(fp,"%s",this->left_chop_quality);
+ }
+ FPRINTF(fp,"%.*s",this->fulllength - hardclip_high - hardclip_low,&(this->quality[hardclip_low]));
+ if (this->right_chop_quality != NULL && hardclip_high == 0) {
+ FPRINTF(fp,"%s",this->right_chop_quality);
+ }
+
+ } else {
+ FPRINTF(fp,"\t");
+ if (this->left_chop_quality != NULL && hardclip_low == 0) {
+ for (i = 0; i < this->left_choplength; i++) {
+ if ((c = this->left_chop_quality[i] + shift) <= 32) {
+ abort();
+ } else {
+ FPRINTF(fp,"%c",c);
+ }
+ }
+ }
+
+ for (i = hardclip_low; i < this->fulllength - hardclip_high; i++) {
+ if ((c = this->quality[i] + shift) <= 32) {
abort();
} else {
FPRINTF(fp,"%c",c);
}
}
- }
- for (i = hardclip_low; i < this->fulllength - hardclip_high; i++) {
- if ((c = this->quality[i] + shift) <= 32) {
- fprintf(stderr,"Warning: With a quality-print-shift of %d, QC score %c becomes non-printable. May need to specify --quality-protocol or --quality-print-shift\n",
- shift,this->left_chop_quality[i]);
- abort();
- } else {
- FPRINTF(fp,"%c",c);
- }
- }
- if (this->right_chop_quality != NULL && hardclip_high == 0) {
- if ((c = this->right_chop_quality[i] + shift) <= 32) {
- fprintf(stderr,"Warning: With a quality-print-shift of %d, QC score %c becomes non-printable. May need to specify --quality-protocol or --quality-print-shift\n",
- shift,this->right_chop_quality[i]);
- abort();
- } else {
- FPRINTF(fp,"%c",c);
+
+ if (this->right_chop_quality != NULL && hardclip_high == 0) {
+ for (i = 0; i < this->right_choplength; i++) {
+ if ((c = this->right_chop_quality[i] + shift) <= 32) {
+ abort();
+ } else {
+ FPRINTF(fp,"%c",c);
+ }
+ }
}
}
}
@@ -4933,48 +4921,54 @@ Shortread_print_hardclipped_reverse_quality (Filestring_T fp, T this, int hardcl
if (this->quality == NULL) {
FPRINTF(fp,"\t*");
- } else if (shift == 0) {
- FPRINTF(fp,"\t");
- if (this->right_chop != NULL && hardclip_high == 0) {
- FPRINTF(fp,"%.*R",this->right_choplength,this->right_chop_quality);
+ } else {
+ /* Hardclip on same side as chop includes choplength */
+ if (this->right_chop_quality != NULL && hardclip_low > 0) {
+ hardclip_low -= this->right_choplength;
}
- FPRINTF(fp,"%.*R",this->fulllength - hardclip_high - hardclip_low,&(this->quality[hardclip_high]));
- if (this->left_chop != NULL && hardclip_low == 0) {
- FPRINTF(fp,"%.*R",this->left_choplength,this->left_chop_quality);
+ if (this->left_chop_quality != NULL && hardclip_high > 0) {
+ hardclip_high -= this->left_choplength;
}
- } else {
- FPRINTF(fp,"\t");
- if (this->right_chop != NULL && hardclip_high == 0) {
- for (i = this->right_choplength - 1; i >= 0; --i) {
- if ((c = this->right_chop_quality[i] + shift) <= 32) {
- fprintf(stderr,"Warning: With a quality-print-shift of %d, QC score %c becomes non-printable. May need to specify --quality-protocol or --quality-print-shift\n",
- shift,this->right_chop_quality[i]);
- abort();
- } else {
- FPRINTF(fp,"%c",c);
- }
+ if (shift == 0) {
+ FPRINTF(fp,"\t");
+ if (this->right_chop != NULL && hardclip_low == 0) {
+ FPRINTF(fp,"%r",this->right_chop_quality);
}
- }
- for (i = this->fulllength - hardclip_high - 1; i >= hardclip_low; --i) {
- if ((c = this->quality[i] + shift) <= 32) {
- fprintf(stderr,"Warning: With a quality-print-shift of %d, QC score %c becomes non-printable. May need to specify --quality-protocol or --quality-print-shift\n",
- shift,this->left_chop_quality[i]);
- abort();
- } else {
- FPRINTF(fp,"%c",c);
+ FPRINTF(fp,"%.*r",this->fulllength - hardclip_high - hardclip_low,&(this->quality[hardclip_high]));
+ if (this->left_chop != NULL && hardclip_high == 0) {
+ FPRINTF(fp,"%r",this->left_chop_quality);
}
- }
- if (this->left_chop_quality != NULL && hardclip_low == 0) {
- for (i = this->left_choplength - 1; i >= 0; --i) {
- if ((c = this->left_chop_quality[i] + shift) <= 32) {
- fprintf(stderr,"Warning: With a quality-print-shift of %d, QC score %c becomes non-printable. May need to specify --quality-protocol or --quality-print-shift\n",
- shift,this->left_chop_quality[i]);
+
+ } else {
+ FPRINTF(fp,"\t");
+ if (this->right_chop_quality != NULL && hardclip_low == 0) {
+ for (i = this->right_choplength - 1; i >= 0; --i) {
+ if ((c = this->right_chop_quality[i] + shift) <= 32) {
+ abort();
+ } else {
+ FPRINTF(fp,"%c",c);
+ }
+ }
+ }
+
+ for (i = this->fulllength - hardclip_low - 1; i >= hardclip_high; --i) {
+ if ((c = this->quality[i] + shift) <= 32) {
abort();
} else {
FPRINTF(fp,"%c",c);
}
}
+
+ if (this->left_chop_quality != NULL && hardclip_high == 0) {
+ for (i = this->left_choplength - 1; i >= 0; --i) {
+ if ((c = this->left_chop_quality[i] + shift) <= 32) {
+ abort();
+ } else {
+ FPRINTF(fp,"%c",c);
+ }
+ }
+ }
}
}
@@ -5018,7 +5012,7 @@ Shortread_print_chopped_end_revcomp (Filestring_T fp, T this, int hardclip_low,
FPRINTF(fp,"%.*R",hardclip_low,&(this->contents[this->fulllength - hardclip_low]));
} else {
hardclip_low -= this->right_choplength;
- FPRINTF(fp,"%.*R",this->right_choplength,this->right_chop);
+ FPRINTF(fp,"%R",this->right_chop);
FPRINTF(fp,"%.*R",hardclip_low,&(this->contents[this->fulllength - hardclip_low]));
}
} else {
@@ -5027,7 +5021,7 @@ Shortread_print_chopped_end_revcomp (Filestring_T fp, T this, int hardclip_low,
} else {
hardclip_high -= this->left_choplength;
FPRINTF(fp,"%.*R",hardclip_high,&(this->contents[0]));
- FPRINTF(fp,"%.*R",this->left_choplength,this->left_chop);
+ FPRINTF(fp,"%R",this->left_chop);
}
}
@@ -5037,62 +5031,141 @@ Shortread_print_chopped_end_revcomp (Filestring_T fp, T this, int hardclip_low,
/* For samprint XI field and print_fusion */
void
-Shortread_print_chopped_end_quality (Filestring_T fp, T this, int hardclip_low, int hardclip_high) {
-#ifdef PRINT_INDIVIDUAL_CHARS
+Shortread_print_chopped_end_quality (Filestring_T fp, T this, int hardclip_low, int hardclip_high,
+ int shift) {
int i;
-#endif
+ int c;
if (this->quality == NULL) {
FPRINTF(fp,"*");
return;
} else if (hardclip_low > 0) {
-#ifdef PRINT_INDIVIDUAL_CHARS
- for (i = 0; i < hardclip_low; i++) {
- FPRINTF(fp,"%c",this->quality[i]);
+ if (shift == 0) {
+ if (this->left_choplength == 0) {
+ FPRINTF(fp,"%.*s",hardclip_low,&(this->quality[0]));
+ } else {
+ hardclip_low -= this->left_choplength;
+ FPRINTF(fp,"%s",this->left_chop_quality);
+ FPRINTF(fp,"%.*s",hardclip_low,&(this->quality[0]));
+ }
+
+ } else {
+ hardclip_low -= this->left_choplength;
+ for (i = 0; i < this->left_choplength; i++) {
+ if ((c = this->left_chop_quality[i] + shift) <= 32) {
+ abort();
+ } else {
+ FPRINTF(fp,"%c",c);
+ }
+ }
+ for (i = 0; i < hardclip_low; i++) {
+ if ((c = this->quality[i] + shift) <= 32) {
+ abort();
+ } else {
+ FPRINTF(fp,"%c",c);
+ }
+ }
}
-#else
- FPRINTF(fp,"%.*s",hardclip_low,&(this->quality[0]));
-#endif
- return;
} else {
-#ifdef PRINT_INDIVIDUAL_CHARS
- for (i = this->fulllength - hardclip_high; i < this->fulllength; i++) {
- FPRINTF(fp,"%c",this->quality[i]);
+ if (shift == 0) {
+ if (this->right_choplength == 0) {
+ FPRINTF(fp,"%.*s",hardclip_high,&(this->quality[this->fulllength - hardclip_high]));
+ } else {
+ hardclip_high -= this->right_choplength;
+ FPRINTF(fp,"%.*s",hardclip_high,&(this->quality[this->fulllength - hardclip_high]));
+ FPRINTF(fp,"%s",this->right_chop_quality);
+ }
+ } else {
+ hardclip_high -= this->right_choplength;
+ for (i = this->fulllength - hardclip_high; i < this->fulllength; i++) {
+ if ((c = this->quality[i] + shift) <= 32) {
+ abort();
+ } else {
+ FPRINTF(fp,"%c",c);
+ }
+ }
+ for (i = 0; i < this->right_choplength; i++) {
+ if ((c = this->right_chop_quality[i] + shift) <= 32) {
+ abort();
+ } else {
+ FPRINTF(fp,"%c",c);
+ }
+ }
}
-#else
- FPRINTF(fp,"%.*s",hardclip_high,&(this->quality[this->fulllength - hardclip_high]));
-#endif
- return;
}
+
+ return;
}
+
/* For samprint XI field and print_fusion */
void
-Shortread_print_chopped_end_quality_reverse (Filestring_T fp, T this, int hardclip_low, int hardclip_high) {
+Shortread_print_chopped_end_quality_reverse (Filestring_T fp, T this, int hardclip_low, int hardclip_high,
+ int shift) {
+ int i;
+ int c;
if (this->quality == NULL) {
FPRINTF(fp,"*");
return;
} else if (hardclip_low > 0) {
-#ifdef PRINT_INDIVIDUAL_CHARS
- for (i = this->fulllength - 1; i >= this->fulllength - hardclip_low; --i) {
- FPRINTF(fp,"%c",this->quality[i]);
+ if (shift == 0) {
+ if (this->right_choplength == 0) {
+ FPRINTF(fp,"%.*r",hardclip_low,&(this->quality[this->fulllength - hardclip_low]));
+ } else {
+ hardclip_low -= this->right_choplength;
+ FPRINTF(fp,"%r",this->right_chop_quality);
+ FPRINTF(fp,"%.*r",hardclip_low,&(this->quality[this->fulllength - hardclip_low]));
+ }
+
+ } else {
+ hardclip_low -= this->right_choplength;
+ for (i = this->right_choplength - 1; i >= 0; --i) {
+ if ((c = this->right_chop_quality[i] + shift) <= 32) {
+ abort();
+ } else {
+ FPRINTF(fp,"%c",c);
+ }
+ }
+ for (i = this->fulllength - 1; i >= this->fulllength - hardclip_low; --i) {
+ if ((c = this->quality[i] + shift) <= 32) {
+ abort();
+ } else {
+ FPRINTF(fp,"%c",c);
+ }
+ }
}
-#else
- FPRINTF(fp,"%.*r",hardclip_low,&(this->quality[this->fulllength - hardclip_low]));
-#endif
} else {
-#ifdef PRINT_INDIVIDUAL_CHARS
- for (i = hardclip_high - 1; i >= 0; --i) {
- FPRINTF(fp,"%c",this->quality[i]);
+ if (shift == 0) {
+ if (this->left_choplength == 0) {
+ FPRINTF(fp,"%.*r",hardclip_high,&(this->quality[0]));
+ } else {
+ hardclip_high -= this->left_choplength;
+ FPRINTF(fp,"%.*r",hardclip_high,&(this->quality[0]));
+ FPRINTF(fp,"%r",this->left_chop_quality);
+ }
+
+ } else {
+ hardclip_high -= this->left_choplength;
+ for (i = hardclip_high - 1; i >= 0; --i) {
+ if ((c = this->quality[i] + shift) <= 32) {
+ abort();
+ } else {
+ FPRINTF(fp,"%c",c);
+ }
+ }
+ for (i = this->left_choplength - 1; i >= 0; --i) {
+ if ((c = this->left_chop_quality[i] + shift) <= 32) {
+ abort();
+ } else {
+ FPRINTF(fp,"%c",c);
+ }
+ }
}
-#else
- FPRINTF(fp,"%.*r",hardclip_high,&(this->quality[0]));
-#endif
}
return;
=====================================
src/shortread.h
=====================================
@@ -201,9 +201,11 @@ Shortread_print_chopped_end (Filestring_T fp, T this, int hardclip_low, int hard
extern void
Shortread_print_chopped_end_revcomp (Filestring_T fp, T this, int hardclip_low, int hardclip_high);
extern void
-Shortread_print_chopped_end_quality (Filestring_T fp, T this, int hardclip_low, int hardclip_high);
+Shortread_print_chopped_end_quality (Filestring_T fp, T this, int hardclip_low, int hardclip_high,
+ int shift);
extern void
-Shortread_print_chopped_end_quality_reverse (Filestring_T fp, T this, int hardclip_low, int hardclip_high);
+Shortread_print_chopped_end_quality_reverse (Filestring_T fp, T this, int hardclip_low, int hardclip_high,
+ int shift);
extern void
Shortread_print_barcode (Filestring_T fp, T this);
=====================================
src/spliceends.c
=====================================
@@ -380,7 +380,10 @@ novel_spliceends_trim5_sense (double *max_prob, bool *partnerp, T this,
mismatchi--;
}
- if (plusp) {
+ if (low_univdiagonal > high_univdiagonal) {
+ /* Skip */
+
+ } else if (plusp) {
debug1(printf("novel_spliceends_trim5_sense, plus\n"));
/* Region from start_genomicpos to middle_genomicpos: require a partner */
@@ -972,7 +975,10 @@ novel_spliceends_trim5_anti (double *max_prob, bool *partnerp, T this,
mismatchi--;
}
- if (plusp) {
+ if (low_univdiagonal > high_univdiagonal) {
+ /* Skip */
+
+ } else if (plusp) {
debug1(printf("novel_spliceends_trim5_anti, plus\n"));
/* Region from start_genomicpos to middle_genomicpos: require a partner */
@@ -1802,7 +1808,10 @@ novel_spliceends_trim3_sense (double *max_prob, bool *partnerp, T this,
mismatchi--;
}
- if (plusp) {
+ if (low_univdiagonal > high_univdiagonal) {
+ /* Skip */
+
+ } else if (plusp) {
debug1(printf("novel_spliceends_trim3_sense, plus\n"));
/* Region from start_genomicpos to middle_genomicpos: require a partner */
@@ -2401,7 +2410,10 @@ novel_spliceends_trim3_anti (double *max_prob, bool *partnerp, T this,
mismatchi--;
}
- if (plusp) {
+ if (low_univdiagonal > high_univdiagonal) {
+ /* Skip */
+
+ } else if (plusp) {
debug1(printf("novel_spliceends_trim3_anti, plus\n"));
/* Region from start_genomicpos to middle_genomicpos: require a partner */
=====================================
src/stage3.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 225447 2022-12-12 21:53:32Z twu $";
+static char rcsid[] = "$Id$";
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
@@ -9346,7 +9346,8 @@ traverse_genome_gap (bool *filledp, bool *shiftp, int *dynprogindex_minor, int *
Genome_T genome, Genome_T genomealt, Pairpool_T pairpool,
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Chrpos_T *last_genomedp5, Chrpos_T *last_genomedp3,
- int maxpeelback, double defect_rate, bool finalp, bool simplep) {
+ int maxpeelback, double defect_rate, bool finalp, bool simplep,
+ bool allow_microexon_p) {
List_T gappairs, peeled_pairs = NULL, peeled_path = NULL, p;
Pair_T pair;
int queryjump, genomejump;
@@ -9641,6 +9642,11 @@ traverse_genome_gap (bool *filledp, bool *shiftp, int *dynprogindex_minor, int *
*path = Pairpool_transfer(*path,peeled_path);
introntype = NONINTRON;
+ } else if (allow_microexon_p == false) {
+ *filledp = true;
+ debug(printf("Microexons disallowed, so using the given solution\n"));
+ pairs = Pairpool_transfer(pairs,gappairs);
+
} else {
*filledp = true;
@@ -9679,7 +9685,7 @@ traverse_genome_gap (bool *filledp, bool *shiftp, int *dynprogindex_minor, int *
/*new_leftprob*/left_prob,/*new_rightprob*/right_prob,
genome,genomealt,chrnum,chroffset,chrhigh,
cdna_direction,watsonp) == false) {
- debug(printf("Shift is worse, so going back to original\n"));
+ debug(printf("Shift is worse, so going back to original, which introduces a dual break\n"));
pairs = Pairpool_transfer(pairs,peeled_pairs);
*path = Pairpool_transfer(*path,peeled_path);
@@ -11504,7 +11510,7 @@ build_dual_breaks (bool *dual_break_p, int *dynprogindex_minor, int *dynproginde
queryseq_ptr,queryuc_ptr,querylength,
cdna_direction,watsonp,genestrand,jump_late_p,
genome,genomealt,pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5,last_genomedp3,
- maxpeelback,defect_rate,/*finalp*/false,simplep);
+ maxpeelback,defect_rate,/*finalp*/false,simplep,/*allow_microexon_p*/true);
} else {
debug(printf(" Solving as a dual break\n"));
@@ -12147,7 +12153,7 @@ build_pairs_introns (bool *shiftp, bool *incompletep,
queryseq_ptr,queryuc_ptr,querylength,
cdna_direction,watsonp,genestrand,jump_late_p,
genome,genomealt,pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5,last_genomedp3,
- maxpeelback,defect_rate,finalp,simplep);
+ maxpeelback,defect_rate,finalp,simplep,/*allow_microexon_p*/true);
/* Previously had forcep == true, because previously thought that adding large gap is not a good solution */
if (filledp == true) {
@@ -18219,6 +18225,7 @@ Stage3_merge_local (T old_left, T old_right,
genome,genomealt,pairpool,dynprogL,dynprogM,dynprogR,maxpeelback);
} else {
+ /* Do not look for microexons, since they can introduce a dual break that is not resolved by this procedure */
debug10(printf("traverse_genome_gap with cdna_direction %d...",cdna_direction));
new_right->pairs = traverse_genome_gap(&filledp,&shiftp,&dynprogindex_minor,&dynprogindex_major,
new_right->pairs,&path,leftpair,rightpair,
@@ -18227,7 +18234,8 @@ Stage3_merge_local (T old_left, T old_right,
watsonp,new_right->genestrand,
/*jump_late_p*/watsonp ? false : true,genome,genomealt,pairpool,
dynprogL,dynprogM,dynprogR,/*last_genomedp5*/NULL,/*last_genomedp3*/NULL,
- maxpeelback,/*defect_rate*/0,/*finalp*/true,/*simplep*/false);
+ maxpeelback,/*defect_rate*/0,/*finalp*/true,/*simplep*/false,
+ /*allow_microexon_p*/false);
debug10(printf("done"));
if (filledp == false) {
View it on GitLab: https://salsa.debian.org/med-team/gmap/-/commit/776a1a5b7c7ef337675249b9938f4e0a1455d66b
--
View it on GitLab: https://salsa.debian.org/med-team/gmap/-/commit/776a1a5b7c7ef337675249b9938f4e0a1455d66b
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20231129/8db4f51c/attachment-0001.htm>
More information about the debian-med-commit
mailing list