[med-svn] [gmap] 01/12: Imported Upstream version 2015-12-31.v4
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Fri Jan 29 15:33:06 UTC 2016
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit 235299e8f7de1f982554a899ddc30b1cbad92f1e
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Fri Jan 15 09:43:21 2016 +0100
Imported Upstream version 2015-12-31.v4
---
ChangeLog | 46 +++++++++++++
VERSION | 2 +-
configure | 24 +++----
src/dynprog_genome.c | 87 ++++++++++++++++---------
src/dynprog_single.c | 3 +-
src/gsnap.c | 4 +-
src/output.c | 8 +--
src/samprint.c | 180 ++++++++++++++++++++++++++++++++-------------------
src/samprint.h | 4 +-
src/sarray-read.c | 38 +++++++----
src/sarray-read.h | 4 +-
src/stage1hr.c | 57 ++++++++++------
src/stage3hr.c | 123 ++++++++++++++++++++++++++---------
src/substring.c | 2 +-
14 files changed, 403 insertions(+), 179 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 5f82c15..88bfd7a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,49 @@
+2016-01-12 twu
+
+ * stage3hr.c, substring.c: Reverting back to remove salvage procedure for
+ terminal alignments
+
+ * substring.c: Computing trim_left and trim_right, even when trim_left_p and
+ trim_right_p are false
+
+ * stage3hr.c: Fixed coordinate calculations in salvage procedure for
+ terminal alignments with too many mismatches
+
+2016-01-08 twu
+
+ * gsnap.c, sarray-read.c, sarray-read.h: Not allowing ambiguous splicing in
+ a circular chromosome
+
+ * dynprog_single.c: Added assertion for glength > 0
+
+2016-01-07 twu
+
+ * dynprog_genome.c: Handling the case where dynamic programming does not
+ find an intron solution
+
+ * VERSION: Updated version number
+
+ * stage1hr.c: Applied revision 181902 from trunk to use MAX_ANCHORS and keep
+ track of both all_segments and anchor_segments
+
+ * stage3hr.c: Applied revision 181882 from trunk to turn off comparison of
+ score_eventrim with cutoff_level in computing optimal_scores, and to
+ recompute pos5 or pos3 in Stage3end_new_terminal if number of mismatches
+ exceeds the number allowed.
+
+ * sarray-read.c, stage3hr.c: Readjusting cutoff_levels in
+ Stage3_pair_up_concordant. Handling Junction_gc within
+ Stage3end_new_substrings.
+
+ * samprint.c, samprint.h: Made fixes to --add-paired-nomappers option
+
+ * output.c: Using new interface to SAM_print_nomapping
+
+2015-12-09 twu
+
+ * samprint.c: Fixed problem with --add-paired-nomappers option for unpaired
+ multiple alignments
+
2015-11-20 twu
* VERSION: Updated version number
diff --git a/VERSION b/VERSION
index b8b724d..451087c 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2015-11-20
\ No newline at end of file
+2015-12-31
\ No newline at end of file
diff --git a/configure b/configure
index 53d74b8..a7ddd4a 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.63 for gmap 2015-11-20.
+# Generated by GNU Autoconf 2.63 for gmap 2015-12-31.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -745,8 +745,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2015-11-20'
-PACKAGE_STRING='gmap 2015-11-20'
+PACKAGE_VERSION='2015-12-31'
+PACKAGE_STRING='gmap 2015-12-31'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
ac_unique_file="src/gmap.c"
@@ -1513,7 +1513,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2015-11-20 to adapt to many kinds of systems.
+\`configure' configures gmap 2015-12-31 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1584,7 +1584,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2015-11-20:";;
+ short | recursive ) echo "Configuration of gmap 2015-12-31:";;
esac
cat <<\_ACEOF
@@ -1721,7 +1721,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2015-11-20
+gmap configure 2015-12-31
generated by GNU Autoconf 2.63
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1735,7 +1735,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2015-11-20, which was
+It was created by gmap $as_me 2015-12-31, which was
generated by GNU Autoconf 2.63. Invocation command line was
$ $0 $@
@@ -2105,8 +2105,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:$LINENO: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:$LINENO: result: 2015-11-20" >&5
-$as_echo "2015-11-20" >&6; }
+{ $as_echo "$as_me:$LINENO: result: 2015-12-31" >&5
+$as_echo "2015-12-31" >&6; }
### Read defaults
@@ -4172,7 +4172,7 @@ fi
# Define the identity of the package.
PACKAGE='gmap'
- VERSION='2015-11-20'
+ VERSION='2015-12-31'
cat >>confdefs.h <<_ACEOF
@@ -26591,7 +26591,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2015-11-20, which was
+This file was extended by gmap $as_me 2015-12-31, which was
generated by GNU Autoconf 2.63. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -26654,7 +26654,7 @@ Report bugs to <bug-autoconf at gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_version="\\
-gmap config.status 2015-11-20
+gmap config.status 2015-12-31
configured by $0, generated by GNU Autoconf 2.63,
with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/src/dynprog_genome.c b/src/dynprog_genome.c
index d9ff04f..8e41ef3 100644
--- a/src/dynprog_genome.c
+++ b/src/dynprog_genome.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_genome.c 174482 2015-09-22 00:58:39Z twu $";
+static char rcsid[] = "$Id: dynprog_genome.c 181911 2016-01-07 21:25:51Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -3494,7 +3494,7 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
char *rev_rsequence, *rev_rsequenceuc;
Mismatchtype_T mismatchtype;
int canonical_reward;
- int rev_roffset, bestrL, bestrR, bestcL, bestcR;
+ int rev_roffset, bestrL = -1, bestrR, bestcL, bestcR;
int lbandL, ubandL, lbandR, ubandR;
int open, extend;
#if defined(HAVE_SSE4_1) || defined(HAVE_SSE2)
@@ -3743,16 +3743,25 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
#endif
mismatchtype,open,extend,lbandR,/*for revp true*/!jump_late_p,/*revp*/true);
- if ((*finalscore = bridge_intron_gap_8_ud(&bestrL,&bestrR,&bestcL,&bestcR,
- &(*introntype),&(*left_prob),&(*right_prob),
- matrix8L_upper,matrix8L_lower,matrix8R_upper,matrix8R_lower,
- directions8L_upper_nogap,directions8L_lower_nogap,
- directions8R_upper_nogap,directions8R_lower_nogap,
- gsequenceL,gsequenceL_alt,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
- goffsetL,rev_goffsetR,rlength,glengthL,glengthR,
- cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,defect_rate,
- canonical_reward,goffsetL,rev_goffsetR,
- chrnum,chroffset,chrhigh,halfp,finalp,jump_late_p)) < 0) {
+ if (bestrL < 0) {
+ debug3(printf("No solution found\n"));
+ FREEA(rev_gsequenceR_alt);
+ FREEA(rev_gsequenceR);
+ FREEA(gsequenceL_alt);
+ FREEA(gsequenceL);
+
+ return (List_T) NULL;
+
+ } else if ((*finalscore = bridge_intron_gap_8_ud(&bestrL,&bestrR,&bestcL,&bestcR,
+ &(*introntype),&(*left_prob),&(*right_prob),
+ matrix8L_upper,matrix8L_lower,matrix8R_upper,matrix8R_lower,
+ directions8L_upper_nogap,directions8L_lower_nogap,
+ directions8R_upper_nogap,directions8R_lower_nogap,
+ gsequenceL,gsequenceL_alt,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
+ goffsetL,rev_goffsetR,rlength,glengthL,glengthR,
+ cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,defect_rate,
+ canonical_reward,goffsetL,rev_goffsetR,
+ chrnum,chroffset,chrhigh,halfp,finalp,jump_late_p)) < 0) {
FREEA(rev_gsequenceR_alt);
FREEA(rev_gsequenceR);
FREEA(gsequenceL_alt);
@@ -3877,16 +3886,25 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
#endif
mismatchtype,open,extend,lbandR,/*for revp true*/!jump_late_p,/*revp*/true);
- if ((*finalscore = bridge_intron_gap_16_ud(&bestrL,&bestrR,&bestcL,&bestcR,
- &(*introntype),&(*left_prob),&(*right_prob),
- matrix16L_upper,matrix16L_lower,matrix16R_upper,matrix16R_lower,
- directions16L_upper_nogap,directions16L_lower_nogap,
- directions16R_upper_nogap,directions16R_lower_nogap,
- gsequenceL,gsequenceL_alt,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
- goffsetL,rev_goffsetR,rlength,glengthL,glengthR,
- cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,defect_rate,
- canonical_reward,goffsetL,rev_goffsetR,
- chrnum,chroffset,chrhigh,halfp,finalp,jump_late_p)) < 0) {
+ if (bestrL < 0) {
+ debug3(printf("No solution found\n"));
+ FREEA(rev_gsequenceR_alt);
+ FREEA(rev_gsequenceR);
+ FREEA(gsequenceL_alt);
+ FREEA(gsequenceL);
+
+ return (List_T) NULL;
+
+ } else if ((*finalscore = bridge_intron_gap_16_ud(&bestrL,&bestrR,&bestcL,&bestcR,
+ &(*introntype),&(*left_prob),&(*right_prob),
+ matrix16L_upper,matrix16L_lower,matrix16R_upper,matrix16R_lower,
+ directions16L_upper_nogap,directions16L_lower_nogap,
+ directions16R_upper_nogap,directions16R_lower_nogap,
+ gsequenceL,gsequenceL_alt,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
+ goffsetL,rev_goffsetR,rlength,glengthL,glengthR,
+ cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,defect_rate,
+ canonical_reward,goffsetL,rev_goffsetR,
+ chrnum,chroffset,chrhigh,halfp,finalp,jump_late_p)) < 0) {
FREEA(rev_gsequenceR_alt);
FREEA(rev_gsequenceR);
@@ -3993,14 +4011,23 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
mismatchtype,open,extend,lbandL,ubandR,
/*for revp true*/!jump_late_p,/*revp*/true,/*saturation*/NEG_INFINITY_INT);
- if ((*finalscore = bridge_intron_gap(&bestrL,&bestrR,&bestcL,&bestcR,
- &(*introntype),&(*left_prob),&(*right_prob),
- matrixL,matrixR,directionsL_nogap,directionsR_nogap,
- gsequenceL,gsequenceL_alt,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
- goffsetL,rev_goffsetR,rlength,glengthL,glengthR,
- cdna_direction,watsonp,extraband_paired,defect_rate,
- canonical_reward,goffsetL,rev_goffsetR,
- chrnum,chroffset,chrhigh,halfp,finalp,jump_late_p)) < 0) {
+ if (bestrL < 0) {
+ debug3(printf("No solution found\n"));
+ FREEA(gsequenceL_alt);
+ FREEA(rev_gsequenceR_alt);
+ FREEA(gsequenceL);
+ FREEA(rev_gsequenceR);
+
+ return (List_T) NULL;
+
+ } else if ((*finalscore = bridge_intron_gap(&bestrL,&bestrR,&bestcL,&bestcR,
+ &(*introntype),&(*left_prob),&(*right_prob),
+ matrixL,matrixR,directionsL_nogap,directionsR_nogap,
+ gsequenceL,gsequenceL_alt,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
+ goffsetL,rev_goffsetR,rlength,glengthL,glengthR,
+ cdna_direction,watsonp,extraband_paired,defect_rate,
+ canonical_reward,goffsetL,rev_goffsetR,
+ chrnum,chroffset,chrhigh,halfp,finalp,jump_late_p)) < 0) {
FREEA(gsequenceL_alt);
FREEA(rev_gsequenceR_alt);
diff --git a/src/dynprog_single.c b/src/dynprog_single.c
index afae754..3022e28 100644
--- a/src/dynprog_single.c
+++ b/src/dynprog_single.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_single.c 146623 2014-09-02 21:31:32Z twu $";
+static char rcsid[] = "$Id: dynprog_single.c 181922 2016-01-08 00:43:31Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -504,6 +504,7 @@ Dynprog_single_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmis
/* Can happen in bad genomic regions. May want to give up, though, if rlength and glength differ greatly. */
assert(glength < 1000);
#endif
+ assert(glength > 0);
if (rlength > dynprog->max_rlength || glength > dynprog->max_glength) {
debug(printf("rlength %d or glength %d is too long. Returning NULL\n",rlength,glength));
diff --git a/src/gsnap.c b/src/gsnap.c
index e565155..bf91e19 100644
--- a/src/gsnap.c
+++ b/src/gsnap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gsnap.c 179364 2015-11-20 22:14:22Z twu $";
+static char rcsid[] = "$Id: gsnap.c 181923 2016-01-08 00:43:56Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -3176,7 +3176,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
Genome_setup(genomecomp,genomecomp_alt,mode,circular_typeint);
#ifndef LARGE_GENOMES
if (sarray_fwd != NULL && sarray_rev != NULL) {
- Sarray_setup(sarray_fwd,sarray_rev,genomecomp,mode,chromosome_iit,circular_typeint,
+ Sarray_setup(sarray_fwd,sarray_rev,genomecomp,mode,chromosome_iit,circular_typeint,circularp,
shortsplicedist,localsplicing_penalty,
max_deletionlength,max_end_deletions,max_middle_insertions,max_end_insertions,
splicesites,splicetypes,splicedists,nsplicesites);
diff --git a/src/output.c b/src/output.c
index 46fd3b4..3b59563 100644
--- a/src/output.c
+++ b/src/output.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: output.c 166468 2015-05-28 02:39:14Z twu $";
+static char rcsid[] = "$Id: output.c 181903 2016-01-07 20:30:55Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -186,7 +186,7 @@ filestring_fromresult_sam (Filestring_T *fp_failedinput_1, Filestring_T *fp_fail
SAM_print_nomapping(fp,ABBREV_NOMAPPING_1,
queryseq1,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
/*acc2*/NULL,chromosome_iit,resulttype,
- /*first_read_p*/true,/*npaths*/0,/*npaths_mate*/0,/*mate_chrpos*/0U,
+ /*first_read_p*/true,/*pathnum*/0,/*npaths*/0,/*npaths_mate*/0,/*mate_chrpos*/0U,
quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
if (failedinput_root != NULL) {
Shortread_print_query_singleend(*fp_failedinput_1,queryseq1,/*headerseq*/queryseq1);
@@ -237,7 +237,7 @@ filestring_fromresult_sam (Filestring_T *fp_failedinput_1, Filestring_T *fp_fail
SAM_print_nomapping(fp,ABBREV_UNPAIRED_TRANSLOC,
queryseq1,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
/*acc2*/NULL,chromosome_iit,resulttype,
- /*first_read_p*/true,npaths,/*npaths_mate*/0,/*mate_chrpos*/0U,
+ /*first_read_p*/true,/*pathnum*/1,npaths,/*npaths_mate*/0,/*mate_chrpos*/0U,
quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
if (failedinput_root != NULL) {
Shortread_print_query_singleend(*fp_failedinput_1,queryseq1,/*headerseq*/queryseq1);
@@ -279,7 +279,7 @@ filestring_fromresult_sam (Filestring_T *fp_failedinput_1, Filestring_T *fp_fail
SAM_print_nomapping(fp,ABBREV_UNPAIRED_MULT_XS,
queryseq1,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
/*acc2*/NULL,chromosome_iit,resulttype,
- /*first_read_p*/true,npaths,/*npaths_mate*/0,/*mate_chrpos*/0U,
+ /*first_read_p*/true,/*pathnum*/1,npaths,/*npaths_mate*/0,/*mate_chrpos*/0U,
quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
if (failedinput_root != NULL) {
Shortread_print_query_singleend(*fp_failedinput_1,queryseq1,/*headerseq*/queryseq1);
diff --git a/src/samprint.c b/src/samprint.c
index 27d7c10..2fe012c 100644
--- a/src/samprint.c
+++ b/src/samprint.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: samprint.c 179366 2015-11-20 22:16:36Z twu $";
+static char rcsid[] = "$Id: samprint.c 181904 2016-01-07 20:31:34Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -355,7 +355,7 @@ make_complement_buffered (char *complement, char *sequence, unsigned int length)
void
SAM_print_nomapping (Filestring_T fp, char *abbrev, Shortread_T queryseq, Stage3end_T mate, char *acc1, char *acc2,
Univ_IIT_T chromosome_iit, Resulttype_T resulttype, bool first_read_p,
- int npaths, int npaths_mate, Chrpos_T mate_chrpos, int quality_shift,
+ int pathnum, int npaths, int npaths_mate, Chrpos_T mate_chrpos, int quality_shift,
char *sam_read_group_id, bool invertp, bool invert_mate_p) {
unsigned int flag;
@@ -409,6 +409,9 @@ SAM_print_nomapping (Filestring_T fp, char *abbrev, Shortread_T queryseq, Stage3
/* 12. TAGS: NH */
if (npaths > 0) {
FPRINTF(fp,"\tNH:i:%d",npaths);
+ if (add_paired_nomappers_p == true) {
+ FPRINTF(fp,"\tHI:i:%d",pathnum);
+ }
}
/* 12. TAGS: XB */
@@ -3501,7 +3504,7 @@ SAM_print (Filestring_T fp, Filestring_T fp_failedinput, char *abbrev,
/* Also, can use this test here because --quiet-if-excessive cases go directly to SAM_print_nomapping */
if (npaths == 0) {
SAM_print_nomapping(fp,abbrev,queryseq,mate,acc1,acc2,chromosome_iit,resulttype,first_read_p,
- /*npaths*/0,npaths_mate,mate_chrpos,quality_shift,
+ /*pathnum*/0,/*npaths*/0,npaths_mate,mate_chrpos,quality_shift,
sam_read_group_id,invertp,invert_mate_p);
if (fp_failedinput != NULL) {
@@ -3668,7 +3671,7 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
Stage3pair_T *stage3pairarray, stage3pair;
Stage3end_T *stage3array1, *stage3array2, stage3, mate, hit5, hit3;
Chrpos_T chrpos, chrpos5, chrpos3;
- int npaths, npaths1, npaths2, pathnum;
+ int npaths, npaths_max, npaths1, npaths2, pathnum;
int first_absmq, second_absmq, first_absmq1, second_absmq1, first_absmq2, second_absmq2;
int hardclip5_low = 0, hardclip5_high = 0, hardclip3_low = 0, hardclip3_high = 0, clipdir;
char *acc1, *acc2;
@@ -3693,12 +3696,12 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
Filestring_set_split_output(fp,OUTPUT_NM);
SAM_print_nomapping(fp,ABBREV_NOMAPPING_1,queryseq1,/*mate*/(Stage3end_T) NULL,
acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/true,/*npaths*/0,/*npaths_mate*/0,
+ /*first_read_p*/true,/*pathnum*/0,/*npaths*/0,/*npaths_mate*/0,
/*mate_chrpos*/0U,quality_shift,
sam_read_group_id,invert_first_p,invert_second_p);
SAM_print_nomapping(fp,ABBREV_NOMAPPING_2,queryseq2,/*mate*/(Stage3end_T) NULL,
acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/false,/*npaths*/0,/*npaths_mate*/0,
+ /*first_read_p*/false,/*pathnum*/0,/*npaths*/0,/*npaths_mate*/0,
/*mate_chrpos*/0U,quality_shift,
sam_read_group_id,invert_second_p,invert_first_p);
@@ -3820,13 +3823,13 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
SAM_print_nomapping(fp,ABBREV_CONCORDANT_TRANSLOC,
queryseq1,/*mate*/(Stage3end_T) NULL,
acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/true,npaths,/*npaths_mate*/npaths,
+ /*first_read_p*/true,/*pathnum*/1,npaths,/*npaths_mate*/npaths,
/*mate_chrpos*/0U,quality_shift,
sam_read_group_id,invert_first_p,invert_second_p);
SAM_print_nomapping(fp,ABBREV_CONCORDANT_TRANSLOC,
queryseq2,/*mate*/(Stage3end_T) NULL,
acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/false,npaths,/*npaths_mate*/npaths,
+ /*first_read_p*/false,/*pathnum*/1,npaths,/*npaths_mate*/npaths,
/*mate_chrpos*/0U,quality_shift,
sam_read_group_id,invert_second_p,invert_first_p);
@@ -3892,13 +3895,13 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
SAM_print_nomapping(fp,ABBREV_CONCORDANT_MULT_XS,
queryseq1,/*mate*/(Stage3end_T) NULL,
acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/true,npaths,/*npaths_mate*/npaths,
+ /*first_read_p*/true,/*pathnum*/1,npaths,/*npaths_mate*/npaths,
/*mate_chrpos*/0U,quality_shift,
sam_read_group_id,invert_first_p,invert_second_p);
SAM_print_nomapping(fp,ABBREV_CONCORDANT_MULT_XS,
queryseq2,/*mate*/(Stage3end_T) NULL,
acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/false,npaths,/*npaths_mate*/npaths,
+ /*first_read_p*/false,/*pathnum*/1,npaths,/*npaths_mate*/npaths,
/*mate_chrpos*/0U,quality_shift,
sam_read_group_id,invert_second_p,invert_first_p);
@@ -4066,13 +4069,13 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
SAM_print_nomapping(fp,ABBREV_PAIRED_MULT_XS,
queryseq1,/*mate*/(Stage3end_T) NULL,
acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/true,npaths,/*npaths_mate*/npaths,
+ /*first_read_p*/true,/*pathnum*/1,npaths,/*npaths_mate*/npaths,
/*mate_chrpos*/0U,quality_shift,
sam_read_group_id,invert_first_p,invert_second_p);
SAM_print_nomapping(fp,ABBREV_PAIRED_MULT_XS,
queryseq2,/*mate*/(Stage3end_T) NULL,
acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/false,npaths,/*npaths_mate*/npaths,
+ /*first_read_p*/false,/*pathnum*/1,npaths,/*npaths_mate*/npaths,
/*mate_chrpos*/0U,quality_shift,
sam_read_group_id,invert_second_p,invert_first_p);
@@ -4202,6 +4205,7 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
if (add_paired_nomappers_p == true) {
/* Artificially pair up results */
+ npaths_max = (npaths1 > npaths2) ? npaths1 : npaths2;
for (pathnum = 1; pathnum <= npaths1 && pathnum <= npaths2 && pathnum <= maxpaths_report; pathnum++) {
/* hardclip5_low = hardclip5_high = 0; */
chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,/*stage3*/stage3array1[pathnum-1],
@@ -4212,23 +4216,23 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
Shortread_fulllength(queryseq2),/*first_read_p*/false);
stage3 = stage3array1[pathnum-1];
- SAM_print(fp,fp_failedinput_1,abbrev,stage3,/*mate*/stage3array2[pathnum-1],acc1,acc2,pathnum,npaths1,
+ SAM_print(fp,fp_failedinput_1,abbrev,stage3,/*mate*/stage3array2[pathnum-1],acc1,acc2,pathnum,npaths_max,
Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
/*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
/*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
- resulttype,/*first_read_p*/true,/*npaths_mate*/npaths2,quality_shift,sam_read_group_id,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/npaths_max,quality_shift,sam_read_group_id,
invert_first_p,invert_second_p,merge_samechr_p);
stage3 = stage3array2[pathnum-1];
- SAM_print(fp,fp_failedinput_2,abbrev,stage3,/*mate*/stage3array1[pathnum-1],acc1,acc2,pathnum,npaths2,
+ SAM_print(fp,fp_failedinput_2,abbrev,stage3,/*mate*/stage3array1[pathnum-1],acc1,acc2,pathnum,npaths_max,
Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
/*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
/*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
- resulttype,/*first_read_p*/false,/*npaths_mate*/npaths1,quality_shift,sam_read_group_id,
+ resulttype,/*first_read_p*/false,/*npaths_mate*/npaths_max,quality_shift,sam_read_group_id,
invert_second_p,invert_first_p,merge_samechr_p);
}
@@ -4241,24 +4245,24 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
Shortread_fulllength(queryseq1),/*first_read_p*/true);
chrpos3 = 0;
- SAM_print(fp,fp_failedinput_1,abbrev,stage3,/*mate*/NULL,acc1,acc2,pathnum,npaths1,
+ SAM_print(fp,fp_failedinput_1,abbrev,stage3,/*mate*/NULL,acc1,acc2,pathnum,npaths_max,
Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
/*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
/*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
- resulttype,/*first_read_p*/true,/*npaths_mate*/npaths2,quality_shift,sam_read_group_id,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/npaths_max,quality_shift,sam_read_group_id,
invert_first_p,invert_second_p,merge_samechr_p);
/* matching nomapper for second end */
SAM_print_nomapping(fp,abbrev,queryseq2,/*mate*/stage3,acc1,acc2,chromosome_iit,
- resulttype,/*first_read_p*/false,/*npaths*/npaths2,/*npaths_mate*/npaths1,
+ resulttype,/*first_read_p*/false,pathnum,/*npaths*/npaths_max,/*npaths_mate*/npaths_max,
/*mate_chrpos*/chrpos5,
quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
}
} else if (npaths2 > npaths1) {
- for ( ; pathnum <= npaths1 && pathnum <= maxpaths_report; pathnum++) {
+ for ( ; pathnum <= npaths2 && pathnum <= maxpaths_report; pathnum++) {
stage3 = stage3array2[pathnum-1];
/* hardclip3_low = hardclip3_high = 0; */
chrpos5 = 0;
@@ -4267,17 +4271,17 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
/* matching nomapper for first end */
SAM_print_nomapping(fp,abbrev,queryseq1,/*mate*/stage3,acc1,acc2,chromosome_iit,
- resulttype,/*first_read_p*/true,/*npaths*/npaths1,/*npaths_mate*/npaths2,
+ resulttype,/*first_read_p*/true,pathnum,/*npaths*/npaths_max,/*npaths_mate*/npaths_max,
/*mate_chrpos*/chrpos3,
quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
- SAM_print(fp,fp_failedinput_2,abbrev,stage3,/*mate*/NULL,acc1,acc2,pathnum,npaths2,
+ SAM_print(fp,fp_failedinput_2,abbrev,stage3,/*mate*/NULL,acc1,acc2,pathnum,npaths_max,
Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
/*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
/*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
- resulttype,/*first_read_p*/false,/*npaths_mate*/npaths1,quality_shift,sam_read_group_id,
+ resulttype,/*first_read_p*/false,/*npaths_mate*/npaths_max,quality_shift,sam_read_group_id,
invert_second_p,invert_first_p,merge_samechr_p);
}
}
@@ -4313,7 +4317,7 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
} else if (quiet_if_excessive_p && npaths1 > maxpaths_report) {
/* Just printing one end as nomapping */
SAM_print_nomapping(fp,abbrev,queryseq1,mate,acc1,acc2,chromosome_iit,
- resulttype,/*first_read_p*/true,npaths1,/*npaths_mate*/npaths2,
+ resulttype,/*first_read_p*/true,/*pathnum*/1,npaths1,/*npaths_mate*/npaths2,
/*mate_chrpos*/chrpos3,
quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
@@ -4367,7 +4371,7 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
} else if (quiet_if_excessive_p && npaths2 > maxpaths_report) {
/* Just printing one end as nomapping */
SAM_print_nomapping(fp,abbrev,queryseq2,mate,acc1,acc2,chromosome_iit,
- resulttype,/*first_read_p*/false,npaths2,/*npaths_mate*/npaths1,
+ resulttype,/*first_read_p*/false,/*pathnum*/1,npaths2,/*npaths_mate*/npaths1,
/*mate_chrpos*/chrpos5,
quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
@@ -4467,7 +4471,7 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
/* Handle nomappers with each mapped mate */
} else {
SAM_print_nomapping(fp,abbrev,queryseq1,mate,acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/true,npaths1,/*npaths_mate*/npaths2,
+ /*first_read_p*/true,/*pathnum*/0,npaths1,/*npaths_mate*/npaths2,
/*mate_chrpos*/chrpos3,
quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
}
@@ -4480,19 +4484,29 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1),
/*first_read_p*/true);
- SAM_print(fp,fp_failedinput_1,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths1,
- Stage3end_absmq_score(stage3),first_absmq1,/*second_absmq1*/0,
- Stage3end_mapq_score(stage3),chromosome_iit,
- /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
- /*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
- /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
- resulttype,/*first_read_p*/true,/*npaths_mate*/npaths2,quality_shift,sam_read_group_id,
- invert_first_p,invert_second_p,merge_samechr_p);
if (add_paired_nomappers_p == true) {
/* matching nomapper for second end */
+ npaths_max = npaths1; /* since npaths2 == 0 */
+ SAM_print(fp,fp_failedinput_1,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths_max,
+ Stage3end_absmq_score(stage3),first_absmq1,/*second_absmq1*/0,
+ Stage3end_mapq_score(stage3),chromosome_iit,
+ /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
+ /*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/npaths_max,quality_shift,sam_read_group_id,
+ invert_first_p,invert_second_p,merge_samechr_p);
SAM_print_nomapping(fp,abbrev,queryseq2,/*mate*/stage3,acc1,acc2,chromosome_iit,
- resulttype,/*first_read_p*/false,/*npaths*/npaths2,/*npaths_mate*/npaths1,/*mate_chrpos*/chrpos5,
+ resulttype,/*first_read_p*/false,/*pathnum*/1,/*npaths*/npaths_max,/*npaths_mate*/npaths_max,/*mate_chrpos*/chrpos5,
quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ } else {
+ SAM_print(fp,fp_failedinput_1,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths1,
+ Stage3end_absmq_score(stage3),first_absmq1,/*second_absmq1*/0,
+ Stage3end_mapq_score(stage3),chromosome_iit,
+ /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
+ /*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/npaths2,quality_shift,sam_read_group_id,
+ invert_first_p,invert_second_p,merge_samechr_p);
}
} else if (quiet_if_excessive_p && npaths1 > maxpaths_report) {
@@ -4502,7 +4516,7 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
/* Handle nomappers with each mapped mate */
} else {
SAM_print_nomapping(fp,abbrev,queryseq1,mate,acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/true,npaths1,/*npaths_mate*/npaths2,
+ /*first_read_p*/true,/*pathnum*/1,npaths1,/*npaths_mate*/npaths2,
/*mate_chrpos*/chrpos3,
quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
}
@@ -4515,19 +4529,31 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1),
/*first_read_p*/true);
- SAM_print(fp,fp_failedinput_1,abbrev,stage3,mate,acc1,acc2,pathnum,npaths1,
- Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
- Stage3end_mapq_score(stage3),chromosome_iit,
- /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
- /*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
- /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
- resulttype,/*first_read_p*/true,/*npaths_mate*/npaths2,quality_shift,sam_read_group_id,
- invert_first_p,invert_second_p,merge_samechr_p);
if (add_paired_nomappers_p == true) {
/* matching nomapper for second end */
+ npaths_max = npaths1; /* since npaths2 == 0 */
SAM_print_nomapping(fp,abbrev,queryseq2,/*mate*/stage3,acc1,acc2,chromosome_iit,
- resulttype,/*first_read_p*/false,/*npaths*/npaths2,/*npaths_mate*/npaths1,/*mate_chrpos*/chrpos5,
+ resulttype,/*first_read_p*/false,pathnum,
+ /*npaths*/npaths_max,/*npaths_mate*/npaths_max,/*mate_chrpos*/chrpos5,
quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ SAM_print(fp,fp_failedinput_1,abbrev,stage3,mate,acc1,acc2,pathnum,npaths_max,
+ Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
+ Stage3end_mapq_score(stage3),chromosome_iit,
+ /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
+ /*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/npaths_max,quality_shift,sam_read_group_id,
+ invert_first_p,invert_second_p,merge_samechr_p);
+
+ } else {
+ SAM_print(fp,fp_failedinput_1,abbrev,stage3,mate,acc1,acc2,pathnum,npaths1,
+ Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
+ Stage3end_mapq_score(stage3),chromosome_iit,
+ /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
+ /*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/npaths2,quality_shift,sam_read_group_id,
+ invert_first_p,invert_second_p,merge_samechr_p);
}
}
}
@@ -4553,7 +4579,7 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
/* Handle nomappers with each mapped mate */
} else {
SAM_print_nomapping(fp,abbrev,queryseq2,mate,acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/false,npaths2,/*npaths_mate*/npaths1,
+ /*first_read_p*/false,/*pathnum*/0,npaths2,/*npaths_mate*/npaths1,
/*mate_chrpos*/chrpos5,
quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
}
@@ -4568,18 +4594,30 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
if (add_paired_nomappers_p == true) {
/* matching nomapper for first end */
+ npaths_max = npaths2; /* since npaths1 == 0 */
SAM_print_nomapping(fp,abbrev,queryseq2,/*mate*/stage3,acc1,acc2,chromosome_iit,
- resulttype,/*first_read_p*/true,/*npaths*/npaths1,/*npaths_mate*/npaths2,/*mate_chrpos*/chrpos3,
+ resulttype,/*first_read_p*/true,/*pathnum*/1,
+ /*npaths*/npaths_max,/*npaths_mate*/npaths_max,/*mate_chrpos*/chrpos3,
quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths_max,
+ Stage3end_absmq_score(stage3),first_absmq2,/*second_absmq2*/0,
+ Stage3end_mapq_score(stage3),chromosome_iit,
+ /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
+ /*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/false,/*npaths_mate*/npaths_max,quality_shift,sam_read_group_id,
+ invert_second_p,invert_first_p,merge_samechr_p);
+
+ } else {
+ SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths2,
+ Stage3end_absmq_score(stage3),first_absmq2,/*second_absmq2*/0,
+ Stage3end_mapq_score(stage3),chromosome_iit,
+ /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
+ /*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/false,/*npaths_mate*/npaths1,quality_shift,sam_read_group_id,
+ invert_second_p,invert_first_p,merge_samechr_p);
}
- SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths2,
- Stage3end_absmq_score(stage3),first_absmq2,/*second_absmq2*/0,
- Stage3end_mapq_score(stage3),chromosome_iit,
- /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
- /*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
- /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
- resulttype,/*first_read_p*/false,/*npaths_mate*/npaths1,quality_shift,sam_read_group_id,
- invert_second_p,invert_first_p,merge_samechr_p);
} else if (quiet_if_excessive_p && npaths2 > maxpaths_report) {
/* Just printing one end as nomapping */
@@ -4588,7 +4626,7 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
/* Handle nomappers with each mapped mate */
} else {
SAM_print_nomapping(fp,abbrev,queryseq2,mate,acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/false,npaths2,/*npaths_mate*/npaths1,
+ /*first_read_p*/false,/*pathnum*/1,npaths2,/*npaths_mate*/npaths1,
/*mate_chrpos*/chrpos5,
quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
}
@@ -4603,18 +4641,30 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
if (add_paired_nomappers_p == true) {
/* matching nomapper for first end */
+ npaths_max = npaths2; /* since npaths1 == 0 */
SAM_print_nomapping(fp,abbrev,queryseq2,/*mate*/stage3,acc1,acc2,chromosome_iit,
- resulttype,/*first_read_p*/true,/*npaths*/npaths1,/*npaths_mate*/npaths2,/*mate_chrpos*/chrpos3,
+ resulttype,/*first_read_p*/true,pathnum,
+ /*npaths*/npaths_max,/*npaths_mate*/npaths_max,/*mate_chrpos*/chrpos3,
quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,pathnum,npaths_max,
+ Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
+ Stage3end_mapq_score(stage3),chromosome_iit,
+ /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
+ /*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/false,/*npaths_mate*/npaths_max,quality_shift,sam_read_group_id,
+ invert_second_p,invert_first_p,merge_samechr_p);
+
+ } else {
+ SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,pathnum,npaths2,
+ Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
+ Stage3end_mapq_score(stage3),chromosome_iit,
+ /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
+ /*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/false,/*npaths_mate*/npaths1,quality_shift,sam_read_group_id,
+ invert_second_p,invert_first_p,merge_samechr_p);
}
- SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,pathnum,npaths2,
- Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
- Stage3end_mapq_score(stage3),chromosome_iit,
- /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
- /*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
- /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
- resulttype,/*first_read_p*/false,/*npaths_mate*/npaths1,quality_shift,sam_read_group_id,
- invert_second_p,invert_first_p,merge_samechr_p);
}
}
diff --git a/src/samprint.h b/src/samprint.h
index f43b204..9723497 100644
--- a/src/samprint.h
+++ b/src/samprint.h
@@ -1,4 +1,4 @@
-/* $Id: samprint.h 172734 2015-08-27 16:35:15Z twu $ */
+/* $Id: samprint.h 181904 2016-01-07 20:31:34Z twu $ */
#ifndef SAMPRINT_INCLUDED
#define SAMPRINT_INCLUDED
@@ -41,7 +41,7 @@ SAM_compute_flag (bool plusp, Stage3end_T mate, Resulttype_T resulttype,
extern void
SAM_print_nomapping (Filestring_T fp, char *abbrev, Shortread_T queryseq, Stage3end_T mate, char *acc1, char *acc2,
Univ_IIT_T chromosome_iit, Resulttype_T resulttype, bool first_read_p,
- int npaths, int npaths_mate, Chrpos_T mate_chrpos,
+ int pathnum, int npaths, int npaths_mate, Chrpos_T mate_chrpos,
int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p);
extern void
diff --git a/src/sarray-read.c b/src/sarray-read.c
index e0f9b6e..99f1810 100644
--- a/src/sarray-read.c
+++ b/src/sarray-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-read.c 172738 2015-08-27 16:37:59Z twu $";
+static char rcsid[] = "$Id: sarray-read.c 181923 2016-01-08 00:43:56Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -340,6 +340,7 @@ Sarray_size (Sarray_T this) {
static Sarray_T sarray_fwd;
static Sarray_T sarray_rev;
static Genome_T genome;
+static bool *circularp;
static char conversion_fwd[128];
static char conversion_rev[128];
@@ -441,7 +442,7 @@ sarray_search_char (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, char desired_ch
void
Sarray_setup (T sarray_fwd_in, T sarray_rev_in, Genome_T genome_in, Mode_T mode,
- Univ_IIT_T chromosome_iit_in, int circular_typeint_in,
+ Univ_IIT_T chromosome_iit_in, int circular_typeint_in, bool *circularp_in,
Chrpos_T shortsplicedist_in, int splicing_penalty_in,
int max_deletionlength, int max_end_deletions_in,
int max_middle_insertions, int max_end_insertions,
@@ -452,6 +453,7 @@ Sarray_setup (T sarray_fwd_in, T sarray_rev_in, Genome_T genome_in, Mode_T mode,
sarray_fwd = sarray_fwd_in;
sarray_rev = sarray_rev_in;
genome = genome_in;
+ circularp = circularp_in;
for (i = 0; i < 128; i++) {
conversion_fwd[i] = i;
@@ -7389,8 +7391,12 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
/* Step 3: Handle ambiguous ends on right */
right_ambig_sense = (Substring_T) NULL;
- if (right_endpoints_sense == NULL) {
+ if (circularp[chrnum] == true) {
/* Skip */
+
+ } else if (right_endpoints_sense == NULL) {
+ /* Skip */
+
} else if (Intlist_length(right_endpoints_sense) == 1) {
/* Only one splice on right */
splice_pos = Intlist_head(right_endpoints_sense);
@@ -7462,8 +7468,12 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
right_ambig_antisense = (Substring_T) NULL;
- if (right_endpoints_antisense == NULL) {
+ if (circularp[chrnum] == true) {
/* Skip */
+
+ } else if (right_endpoints_antisense == NULL) {
+ /* Skip */
+
} else if (Intlist_length(right_endpoints_antisense) == 1) {
/* Only one splice on right */
splice_pos = Intlist_head(right_endpoints_antisense);
@@ -7559,7 +7569,10 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
/* Step 5: Handle ambiguous ends on left */
left_ambig_sense = (Substring_T) NULL;
- if (left_endpoints_sense == NULL) {
+ if (circularp[chrnum] == true) {
+ /* Skip */
+
+ } else if (left_endpoints_sense == NULL) {
/* Skip, but extend leftward */
if (Intlist_head(sense_endpoints) > 0) {
sense_endpoints = Intlist_pop(sense_endpoints,&querystart);
@@ -7657,7 +7670,10 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
left_ambig_antisense = (Substring_T) NULL;
- if (left_endpoints_antisense == NULL) {
+ if (circularp[chrnum] == true) {
+ /* Skip */
+
+ } else if (left_endpoints_antisense == NULL) {
/* Skip, but extend leftward */
if (Intlist_head(antisense_endpoints) > 0) {
antisense_endpoints = Intlist_pop(antisense_endpoints,&querystart);
@@ -7799,7 +7815,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
first_read_p,chrnum,chroffset,chrhigh,chrlength,/*sarrayp*/true)) == NULL) {
Substring_free(&right_ambig_sense);
Substring_free(&left_ambig_sense);
- Junction_gc(&sense_junctions);
+ /* Junction_gc(&sense_junctions); -- Done by Stage3end_new_substrings */
Substring_free(&right_ambig_antisense);
Substring_free(&left_ambig_antisense);
} else {
@@ -7819,7 +7835,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
first_read_p,chrnum,chroffset,chrhigh,chrlength,/*sarrayp*/true)) == NULL) {
Substring_free(&right_ambig_sense);
Substring_free(&left_ambig_sense);
- Junction_gc(&sense_junctions);
+ /* Junction_gc(&sense_junctions); -- Done by Stage3end_new_substrings */
} else {
if (Stage3end_substrings_querystart(hit) < 8 &&
Stage3end_substrings_queryend(hit) >= querylength - 8) {
@@ -7834,7 +7850,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
first_read_p,chrnum,chroffset,chrhigh,chrlength,/*sarrayp*/true)) == NULL) {
Substring_free(&right_ambig_antisense);
Substring_free(&left_ambig_antisense);
- Junction_gc(&antisense_junctions);
+ /* Junction_gc(&antisense_junctions); -- Done by Stage3end_new_substrings */
} else {
if (Stage3end_substrings_querystart(hit) < 8 &&
Stage3end_substrings_queryend(hit) >= querylength - 8) {
@@ -7856,7 +7872,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
first_read_p,chrnum,chroffset,chrhigh,chrlength,/*sarrayp*/true)) == NULL) {
Substring_free(&right_ambig_sense);
Substring_free(&left_ambig_sense);
- Junction_gc(&sense_junctions);
+ /* Junction_gc(&sense_junctions); -- Done by Stage3end_new_substrings */
} else {
if (Stage3end_substrings_querystart(hit) < 8 &&
Stage3end_substrings_queryend(hit) >= querylength - 8) {
@@ -7881,7 +7897,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
first_read_p,chrnum,chroffset,chrhigh,chrlength,/*sarrayp*/true)) == NULL) {
Substring_free(&right_ambig_antisense);
Substring_free(&left_ambig_antisense);
- Junction_gc(&antisense_junctions);
+ /* Junction_gc(&antisense_junctions); -- Done by Stage3end_new_substrings */
} else {
if (Stage3end_substrings_querystart(hit) < 8 &&
Stage3end_substrings_queryend(hit) >= querylength - 8) {
diff --git a/src/sarray-read.h b/src/sarray-read.h
index b8ca703..ac10803 100644
--- a/src/sarray-read.h
+++ b/src/sarray-read.h
@@ -1,4 +1,4 @@
-/* $Id: sarray-read.h 166785 2015-06-02 17:58:27Z twu $ */
+/* $Id: sarray-read.h 181923 2016-01-08 00:43:56Z twu $ */
#ifndef SARRAY_READ_INCLUDED
#define SARRAY_READ_INCLUDED
#include "access.h"
@@ -20,7 +20,7 @@ Sarray_size (Sarray_T this);
extern void
Sarray_setup (T sarray_fwd_in, T sarray_rev_in, Genome_T genome_in, Mode_T mode,
- Univ_IIT_T chromosome_iit_in, int circular_typeint_in,
+ Univ_IIT_T chromosome_iit_in, int circular_typeint_in, bool *circularp_in,
Chrpos_T shortsplicedist_in, int splicing_penalty_in,
int max_deletionlength, int max_end_deletions,
int max_middle_insertions, int max_end_insertions,
diff --git a/src/stage1hr.c b/src/stage1hr.c
index 6929c09..39a686f 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 175729 2015-09-30 15:10:51Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 181909 2016-01-07 20:41:49Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -96,6 +96,7 @@ static char rcsid[] = "$Id: stage1hr.c 175729 2015-09-30 15:10:51Z twu $";
#define MAX_NTERMINALS 100
#define MAX_ALLOCATION 200
+#define MAX_ANCHORS 1000
static bool use_sarray_p = true;
static bool use_only_sarray_p = true;
@@ -3697,6 +3698,7 @@ identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spli
#endif
int *npositions, bool *omitted, int querylength, int query_lastpos, Floors_T floors,
int max_mismatches_allowed, bool plusp) {
+ List_T all_segments = NULL;
struct Segment_T *segments = NULL;
Segment_T *array;
int length_threshold;
@@ -3747,6 +3749,7 @@ identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spli
int nsplicesites_local;
debug(printf("*** Starting identify_all_segments on %s ***\n",plusp ? "plus" : "minus"));
+ assert(*anchor_segments == NULL);
if (floors == NULL) {
*nsegments = 0;
@@ -4191,6 +4194,7 @@ identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spli
plusp ? "plus" : "minus",last_diagonal,diagonal));
}
debug14(printf("Saving segment at %u, query %d..%d",last_diagonal,ptr->querypos5,ptr->querypos3));
+ all_segments = List_push(all_segments,(void *) ptr);
if (last_querypos >= first_querypos + /*min_segment_length*/1) {
*anchor_segments = List_push(*anchor_segments,(void *) ptr);
debug14(printf(" ANCHOR"));
@@ -4442,6 +4446,7 @@ identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spli
/* Last segment is not spliceable */
debug14(printf("Saving segment at %u, query %d..%d",last_diagonal,ptr->querypos5,ptr->querypos3));
+ all_segments = List_push(all_segments,(void *) ptr);
if (last_querypos >= first_querypos + /*min_segment_length*/1) {
debug14(printf(" ANCHOR"));
*anchor_segments = List_push(*anchor_segments,(void *) ptr);
@@ -4480,35 +4485,49 @@ identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spli
assert(*nsegments <= total_npositions + nchromosomes);
- *anchor_segments = List_reverse(*anchor_segments);
-#ifdef DEBUG
- printf("%d anchor segments\n",List_length(*anchor_segments));
- for (p = *anchor_segments; p != NULL; p = List_next(p)) {
- segment = (Segment_T) List_head(p);
- printf("%u %d..%d\n",segment->diagonal,segment->querypos5,segment->querypos3);
- }
-#endif
+ debug(printf("%d all segments\n",List_length(all_segments)));
+ debug(printf("%d anchor segments\n",List_length(*anchor_segments)));
+ if (List_length(all_segments) < MAX_ANCHORS) {
+ /* Might as well use all segments */
+ List_free(&(*anchor_segments));
+ *anchor_segments = List_reverse(all_segments);
+
+ } else if (List_length(*anchor_segments) < MAX_ANCHORS) {
+ /* Use only the good anchor segments */
+ List_free(&all_segments);
+ *anchor_segments = List_reverse(*anchor_segments);
+
+ } else {
+ /* Need to limit anchor segments */
+ List_free(&all_segments);
- if (List_length(*anchor_segments) > 10) {
array = (Segment_T *) List_to_array_n(&nanchors,*anchor_segments);
qsort(array,nanchors,sizeof(Segment_T),Segment_length_cmp);
List_free(&(*anchor_segments));
*anchor_segments = (List_T) NULL;
- length_threshold = array[10]->querypos3 - array[10]->querypos5;
- n = 10;
- while (n < nanchors && array[n]->querypos3 - array[n]->querypos5 == length_threshold) {
+ length_threshold = array[MAX_ANCHORS]->querypos3 - array[MAX_ANCHORS]->querypos5;
+ n = MAX_ANCHORS;
+ while (n < nanchors && n < MAX_ANCHORS + /*ties*/100 && array[n]->querypos3 - array[n]->querypos5 == length_threshold) {
n++;
}
- if (n <= 20) {
- qsort(array,n,sizeof(Segment_T),Segment_diagonal_cmp);
- for (i = n-1; i >= 0; i--) {
- *anchor_segments = List_push(*anchor_segments,(void *) array[i]);
- }
+
+ /* Re-sort in diagonal order */
+ qsort(array,n,sizeof(Segment_T),Segment_diagonal_cmp);
+ for (i = n-1; i >= 0; i--) {
+ *anchor_segments = List_push(*anchor_segments,(void *) array[i]);
}
FREE(array);
}
-
+
+
+#ifdef DEBUG19
+ printf("%d total segments\n",*nsegments);
+ for (ptr0 = segments; ptr0 < ptr; ptr0++) {
+ printf("%u %d..%d\n",ptr0->diagonal,ptr0->querypos5,ptr0->querypos3);
+ }
+#endif
+
#ifdef DEBUG
printf("%d selected anchor segments\n",List_length(*anchor_segments));
for (p = *anchor_segments; p != NULL; p = List_next(p)) {
diff --git a/src/stage3hr.c b/src/stage3hr.c
index 15a83a3..fc15b26 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 179365 2015-11-20 22:15:56Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 182190 2016-01-12 22:51:09Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -4761,18 +4761,21 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
if (genomicstart < chroffset && genomicend > chrhigh) {
/* Out of bounds on both sides */
+ Junction_gc(&junctions);
return (T) NULL;
} else if (genomicstart < chroffset) {
outofbounds_start = chroffset - genomicstart;
outofbounds_end = genomicend - chroffset;
debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+ Junction_gc(&junctions);
return (T) NULL;
#if 0
/* Could consider this for the lowest substring */
if (outofbounds_start > outofbounds_end) {
/* Consider high part to be out of bounds and keep existing chromosome */
outofbounds_start = 0;
+ Junction_gc(&junctions);
return (T) NULL;
} else {
/* Consider low part to be out of bounds and stay in this chromosome */
@@ -4784,6 +4787,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
outofbounds_start = chrhigh - genomicstart;
outofbounds_end = genomicend - chrhigh;
debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+ Junction_gc(&junctions);
return (T) NULL;
#if 0
/* Could consider this for the highest substring */
@@ -4792,6 +4796,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
outofbounds_start = 0;
} else if (++chrnum > nchromosomes) {
debug0(printf("Returning NULL from Stage3end_new_substrings\n"));
+ Junction_gc(&junctions);
return (T) NULL;
} else {
/* Consider low part to be out of bounds and move to next chromosome */
@@ -4837,6 +4842,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
}
}
List_free(&substrings);
+ Junction_gc(&junctions);
return (T) NULL;
} else {
substrings = List_push(substrings,(void *) substring);
@@ -4897,12 +4903,14 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
if (genomicend < chroffset && genomicstart > chrhigh) {
/* Out of bounds on both sides */
+ Junction_gc(&junctions);
return (T) NULL;
} else if (genomicend < chroffset) {
outofbounds_end = chroffset - genomicend;
outofbounds_start = genomicstart - chroffset;
debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+ Junction_gc(&junctions);
return (T) NULL;
#if 0
/* Could consider this for the lowest substring */
@@ -4920,6 +4928,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
outofbounds_end = chrhigh - genomicend;
outofbounds_start = genomicstart - chrhigh;
debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+ Junction_gc(&junctions);
return (T) NULL;
#if 0
/* Could consider this for the highest substring */
@@ -4928,6 +4937,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
outofbounds_end = 0;
} else if (++chrnum > nchromosomes) {
debug0(printf("Returning NULL from Stage3end_new_substrings\n"));
+ Junction_gc(&junctions);
return (T) NULL;
} else {
/* Consider low part to be out of bounds and move to next chromosome */
@@ -4973,6 +4983,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
}
}
List_free(&substrings);
+ Junction_gc(&junctions);
return (T) NULL;
} else {
substrings = List_push(substrings,(void *) substring);
@@ -5141,6 +5152,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
if (new->alias == +2 || new->alias == -2) {
debug0(printf("Returning NULL from Stage3end_new_substrings because of alias of %d\n",new->alias));
Stage3end_free(&new);
+ /* Junction_gc(&junctions); -- Done by Stage3end_free */
return (T) NULL;
} else {
debug0(printf("Returning %p from Stage3end_new_substrings with found_score %d\n",new,*found_score));
@@ -7472,6 +7484,10 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
bool trim_left_p, trim_right_p;
int outofbounds_start = 0, outofbounds_end = 0;
+ int *mismatch_positions_left, *mismatch_positions_right, nmismatches_left, nmismatches_right;
+ int length_left, length_right, pos5, pos3;
+ int i;
+
debug0(printf("\nStage3end_new_terminal possible: endtypes %s and %s, left %llu, querystart %d, queryend %d, sarrayp %d\n",
Endtype_string(start_endtype),Endtype_string(end_endtype),(unsigned long long) left,querystart,queryend,sarrayp));
@@ -7610,29 +7626,39 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
nmismatches_whole =
Genome_count_mismatches_substring(query_compress,left,/*pos5*/alignstart_trim-left,
/*pos3*/alignend_trim-left,/*plusp*/true,genestrand,first_read_p);
+ debug0(printf("Recomputing nmismatches_whole as %d\n",nmismatches_whole));
+ if (nmismatches_whole <= max_mismatches_allowed) {
+ /* Code in substring.c suggests that nmismatches_bothdiff would be the same value as nmismatches_whole */
+ Substring_set_nmismatches_terminal(substring,nmismatches_whole,/*nmismatches_bothdiff*/nmismatches_whole);
+
+ } else {
+ debug0(printf("returning NULL\n"));
+ Substring_free(&substring);
+ return (T) NULL;
+ }
+
+ if (0) {
+ /* This may be dependent on the trimming algorithm, but is needed to avoid bad terminal alignments */
+ Substring_free(&substring);
+ debug0(printf("returning NULL\n"));
+ return (T) NULL;
+ }
+
} else {
debug0(printf("minus: pos5 = %d, pos3 = %d\n",(int) (alignend_trim-left),(int) (alignstart_trim-left)));
nmismatches_whole =
Genome_count_mismatches_substring(query_compress,left,/*pos5*/alignend_trim-left,
/*pos3*/alignstart_trim-left,/*plusp*/false,genestrand,first_read_p);
- }
- debug0(printf("Recomputing nmismatches_whole as %d\n",nmismatches_whole));
+ debug0(printf("Recomputing nmismatches_whole as %d\n",nmismatches_whole));
+ if (nmismatches_whole <= max_mismatches_allowed) {
+ /* Code in substring.c suggests that nmismatches_bothdiff would be the same value as nmismatches_whole */
+ Substring_set_nmismatches_terminal(substring,nmismatches_whole,/*nmismatches_bothdiff*/nmismatches_whole);
- if (nmismatches_whole > max_mismatches_allowed) {
- /* This may be dependent on the trimming algorithm, but is needed to avoid bad terminal alignments */
- Substring_free(&substring);
- debug0(printf("returning NULL\n"));
- return (T) NULL;
- } else {
- /* Code in substring.c suggests that nmismatches_bothdiff would be the same value as nmismatches_whole */
- Substring_set_nmismatches_terminal(substring,nmismatches_whole,/*nmismatches_bothdiff*/nmismatches_whole);
-#if 0
- if (alignstart_trim >= alignend_trim) {
- Substring_set_endtypes(substring,/*start_endtype*/TERM,/*end_endtype*/END);
} else {
- Substring_set_endtypes(substring,/*start_endtype*/END,/*end_endtype*/TERM);
+ debug0(printf("returning NULL\n"));
+ Substring_free(&substring);
+ return (T) NULL;
}
-#endif
}
new = (T) MALLOC_OUT(sizeof(*new));
@@ -9056,12 +9082,15 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist, int cutoff_level
}
#endif
+#if 0
} else if (hit->score_eventrim > cutoff_level) {
+ /* Turning off, because we turned it off for Stage3pair_optimal_score_aux */
/* For dibasep were previously using hit->ntscore, but gives false positives */
debug4(printf("Eliminating a hit of type %s with score_eventrim %d > cutoff_level %d\n",
hittype_string(hit->hittype),hit->score_eventrim,cutoff_level));
*eliminatedp = true;
Stage3end_free(&hit);
+#endif
} else if (hit->score_eventrim > minscore /* && hit->nmatches_posttrim < max_nmatches_posttrim */) {
debug4(printf("Eliminating a hit with score_eventrim %d and type %s\n",
@@ -13081,7 +13110,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
}
}
- debug5(printf("\nGot insertlength of %d, overreach5p %d, overreach3p %d\n",new->insertlength,overreach5p,overreach3p));
+ debug5(printf("\nGot insertlength of %d\n",new->insertlength));
/* Was new->insertlength <= 0, but this eliminates legitimate overlaps */
/* Was new->insertlength < -pairmax, but this allows overreach */
if (new->insertlength <= 0) {
@@ -15298,13 +15327,16 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
hittype_string(hitpair->hit5->hittype),hittype_string(hitpair->hit3->hittype),hitpair->score_eventrim));
optimal = List_push(optimal,hitpair);
+#if 0
} else if (score > cutoff_level) {
+ /* Turning off, because it eliminates some shorter, but good, alignments unnecessarily */
debug6(printf("Final: Eliminating a hit pair at %u..%u|%u..%u with score %d > cutoff_level %d (finalp %d)\n",
hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
score,cutoff_level,finalp));
*eliminatedp = true;
Stage3pair_free(&hitpair);
+#endif
} else {
debug6(printf("Final: Keeping a hit pair with score_eventrim %d (cutoff_level %d)\n",
@@ -15611,10 +15643,10 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
hits3 = hits3_plus[score3];
nhits5 = nhits5_plus[score5];
nhits3 = nhits3_plus[score3];
- if (nhits5 > 0 && nhits3 > 0) {
- debug5(printf("at score %d, nhits5_plus = %d; at score %d, nhits3_plus = %d\n",
- score5,nhits5,score3,nhits3));
+ debug5(printf("at score %d, nhits5_plus = %d; at score %d, nhits3_plus = %d\n",
+ score5,nhits5,score3,nhits3));
+ if (nhits5 > 0 && nhits3 > 0) {
i = j = 0;
while (*abort_pairing_p == false && i < nhits5) {
hit5 = hits5[i];
@@ -15732,10 +15764,10 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
hits5 = hits5_minus[score5];
nhits3 = nhits3_minus[score3];
nhits5 = nhits5_minus[score5];
- if (nhits3 > 0 && nhits5 > 0) {
- debug5(printf("at score %d, nhits5_minus = %d; at score %d, nhits3_minus = %d\n",
- score5,nhits5,score3,nhits3));
+ debug5(printf("at score %d, nhits5_minus = %d; at score %d, nhits3_minus = %d\n",
+ score5,nhits5,score3,nhits3));
+ if (nhits3 > 0 && nhits5 > 0) {
i = j = 0;
while (*abort_pairing_p == false && i < nhits3) {
hit3 = hits3[i];
@@ -15854,10 +15886,10 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
hits3 = hits3_minus[score3];
nhits5 = nhits5_plus[score5];
nhits3 = nhits3_minus[score3];
- if (nhits5 > 0 && nhits3 > 0) {
- debug5(printf("at score %d, nhits5_plus = %d; at score %d, nhits3_minus = %d\n",
- score5,nhits5,score3,nhits3));
+ debug5(printf("at score %d, nhits5_plus = %d; at score %d, nhits3_minus = %d\n",
+ score5,nhits5,score3,nhits3));
+ if (nhits5 > 0 && nhits3 > 0) {
i = j = 0;
while (*abort_pairing_p == false && i < nhits5) {
hit5 = hits5[i];
@@ -15939,10 +15971,10 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
hits5 = hits5_minus[score5];
nhits3 = nhits3_plus[score3];
nhits5 = nhits5_minus[score5];
- if (nhits3 > 0 && nhits5 > 0) {
- debug5(printf("at score %d, nhits5_minus = %d; at score %d, nhits3_plus = %d\n",
- score5,nhits5,score3,nhits3));
+ debug5(printf("at score %d, nhits5_minus = %d; at score %d, nhits3_plus = %d\n",
+ score5,nhits5,score3,nhits3));
+ if (nhits3 > 0 && nhits5 > 0) {
i = j = 0;
while (*abort_pairing_p == false && i < nhits3) {
hit3 = hits3[i];
@@ -16146,9 +16178,42 @@ Stage3_pair_up_concordant (bool *abort_pairing_p, int *found_score, int *nconcor
int cutoff_level_hits5, cutoff_level_hits3, cutoff_level_terminals5 = 0, cutoff_level_terminals3 = 0;
int ignore_found_score;
+ int i;
+ int min_score_5 = -1, min_score_3 = -1;
+ List_T q;
+ T hit;
+
debug5(printf("Starting Stage3_pair_up_concordant with %d concordant, narray5 %d, narray3 %d, terminals5 %p, terminals3 %p, found_score %d\n",
*nconcordant,narray5,narray3,terminals5,terminals3,*found_score));
+ /* Re-adjust cutoff_levels for best hit5 and best hit3 */
+ for (i = 0; i < narray5; i++) {
+ for (q = hitarray5[i]; q != NULL; q = q->rest) {
+ hit = (T) q->first;
+ if (min_score_5 == -1 || hit->nmismatches_bothdiff < min_score_5) {
+ min_score_5 = hit->nmismatches_bothdiff;
+ }
+ }
+ }
+ debug5(printf("min_score_5 = %d\n",min_score_5));
+ if (min_score_5 > cutoff_level_5) {
+ cutoff_level_5 = min_score_5;
+ }
+
+ for (i = 0; i < narray3; i++) {
+ for (q = hitarray3[i]; q != NULL; q = q->rest) {
+ hit = (T) q->first;
+ if (min_score_3 == -1 || hit->nmismatches_bothdiff < min_score_3) {
+ min_score_3 = hit->nmismatches_bothdiff;
+ }
+ }
+ }
+ debug5(printf("min_score_3 = %d\n",min_score_3));
+ if (min_score_3 > cutoff_level_3) {
+ cutoff_level_3 = min_score_3;
+ }
+
+
/* Find sizes for allocating memory */
debug5(printf("Sizes of 5-end pieces by score level:\n"));
hits5_plus = (T **) MALLOCA((cutoff_level_5+1) * sizeof(T *));
diff --git a/src/substring.c b/src/substring.c
index 093fa78..c82905c 100644
--- a/src/substring.c
+++ b/src/substring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 173896 2015-09-12 00:11:40Z twu $";
+static char rcsid[] = "$Id: substring.c 182190 2016-01-12 22:51:09Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list