[med-svn] [gmap] 01/11: Imported Upstream version 2015-06-10
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Sat Aug 22 06:25:57 UTC 2015
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit 9c2f1c4f37bec149ed6b2faf436a7343df3e50fb
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Mon Jun 15 11:30:21 2015 +0200
Imported Upstream version 2015-06-10
---
ChangeLog | 1027 ++-
Makefile.am | 2 +-
Makefile.in | 8 +-
README | 15 +-
TODO | 3 +
VERSION | 2 +-
config.site | 5 +
config/ax_mpi.m4 | 8 +-
configure | 222 +-
configure.ac | 76 +-
mpi/Makefile.am | 111 +
mpi/Makefile.in | 2041 ++++++
src/ChangeLog | 0
src/Makefile.am | 51 +-
src/Makefile.in | 657 +-
src/access.c | 505 +-
src/access.h | 23 +-
src/atoi.h | 1 +
src/atoiindex.c | 45 +-
src/bigendian.h | 4 +-
src/bitpack64-write.h | 5 +-
src/block.h | 3 +-
src/bool.h | 6 +-
src/boyer-moore.h | 3 +-
src/bytecoding.h | 3 +-
src/bzip2.h | 3 +-
src/chimera.c | 32 +-
src/chimera.h | 7 +-
src/chrom.h | 3 +-
src/cmet.h | 1 +
src/cmetindex.c | 40 +-
src/compress.c | 4 +-
src/compress.h | 2 +-
src/config.h.in | 27 +-
src/datadir.h | 3 +-
src/diag.c | 94 +-
src/diag.h | 14 +-
src/diagdef.h | 3 +-
src/diagpool.c | 25 +-
src/diagpool.h | 6 +-
src/doublelist.c | 13 +-
src/doublelist.h | 4 +-
src/dynprog.h | 2 +-
src/except.h | 2 +-
src/filestring.c | 490 ++
src/filestring.h | 62 +
src/fopen.h | 2 +-
src/genome-write.h | 3 +-
src/genome.c | 105 +-
src/genome.h | 10 +-
src/genome128_hr.c | 119 +-
src/genome128_hr.h | 6 +-
src/genome_sites.h | 3 +-
src/genomicpos.c | 43 +-
src/genomicpos.h | 13 +-
src/get-genome.c | 54 +-
src/gmap.c | 1745 +++--
src/gmapindex.c | 161 +-
src/goby.c | 345 -
src/goby.h | 49 -
src/gregion.h | 3 +-
src/gsnap.c | 1959 +++--
src/iit-read-univ.c | 135 +-
src/iit-read-univ.h | 24 +-
src/iit-read.c | 35 +-
src/iit-read.h | 7 +-
src/iit-write-univ.h | 3 +-
src/iit-write.h | 3 +-
src/iitdef.h | 4 +-
src/inbuffer.c | 969 ++-
src/inbuffer.h | 89 +-
src/indel.c | 323 +-
src/indel.h | 23 +-
src/indexdb-write.c | 69 +-
src/indexdb-write.h | 6 +-
src/indexdb.c | 215 +-
src/indexdb.h | 11 +-
src/indexdb_hr.h | 3 +-
src/indexdbdef.h | 12 +-
src/interval.h | 3 +-
src/intlist.c | 44 +-
src/intlist.h | 7 +-
src/intron.h | 3 +-
src/junction.c | 240 +
src/junction.h | 63 +
src/list.c | 12 +-
src/list.h | 4 +-
src/littleendian.h | 4 +-
src/master.c | 510 ++
src/master.h | 65 +
src/match.h | 3 +-
src/matchdef.h | 3 +-
src/matchpool.h | 3 +-
src/maxent_hr.h | 2 +
src/md5.c | 6 +-
src/md5.h | 5 +-
src/mem.c | 135 +-
src/mem.h | 7 +-
src/mpidebug.c | 129 +
src/mpidebug.h | 51 +
src/oligo.h | 3 +-
src/oligoindex_hr.c | 19095 ++++++++++++++++++++++++++++++++++--------------
src/oligoindex_hr.h | 26 +-
src/outbuffer.c | 3200 +++-----
src/outbuffer.h | 81 +-
src/output.c | 972 +++
src/output.h | 57 +
src/pair.c | 1718 +++--
src/pair.h | 90 +-
src/pairdef.h | 3 +-
src/parserange.h | 2 +
src/popcount.c | 2 +-
src/popcount.h | 2 +-
src/reader.h | 3 +-
src/request.c | 9 +-
src/request.h | 3 +-
src/resulthr.c | 19 +-
src/resulthr.h | 15 +-
src/sam_sort.c | 684 +-
src/samflags.h | 116 +-
src/samheader.c | 314 +-
src/samheader.h | 32 +-
src/samprint.c | 6603 +++++------------
src/samprint.h | 65 +-
src/sarray-read.c | 6183 ++++++++++++----
src/sarray-read.h | 21 +-
src/sarray-write.c | 202 +-
src/sarray-write.h | 6 +-
src/segmentpos.c | 35 +-
src/segmentpos.h | 7 +-
src/sequence.c | 92 +-
src/sequence.h | 32 +-
src/shortread.c | 2713 +++++--
src/shortread.h | 151 +-
src/snpindex.c | 19 +-
src/splice.c | 705 +-
src/splice.h | 38 +-
src/stage1.c | 2 +-
src/stage1.h | 3 +-
src/stage1hr.c | 14761 ++++++++++++++++++++-----------------
src/stage1hr.h | 13 +-
src/stage2.c | 156 +-
src/stage2.h | 15 +-
src/stage3.c | 1898 +++--
src/stage3.h | 94 +-
src/stage3hr.c | 9830 ++++++++++++-------------
src/stage3hr.h | 136 +-
src/substring.c | 2075 ++++--
src/substring.h | 176 +-
src/tableuint8.h | 3 +-
src/translation.c | 42 +-
src/translation.h | 7 +-
src/types.h | 13 +-
src/uint8list.c | 37 +-
src/uint8list.h | 6 +-
src/uintlist.c | 37 +-
src/uintlist.h | 6 +-
src/uniqscan.c | 101 +-
src/univdiag.c | 116 +
src/univdiag.h | 31 +
src/univdiagdef.h | 22 +
src/univinterval.h | 3 +-
tests/Makefile.in | 4 +-
util/Makefile.in | 4 +-
util/gmap_build.pl.in | 26 +-
165 files changed, 54350 insertions(+), 32098 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 472d40a..62843c5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,196 +1,1025 @@
-2015-05-01 twu
+2015-06-11 twu
+
+ * VERSION, index.html: Updated version number
+
+ * sarray-write.c: Removing rankfile
+
+ * gmapindex.c: Removing rankfile
+
+2015-06-10 twu
+
+ * gmap_build.pl.in: Changed flag from --no-sarray to --build-sarray
+
+ * atoiindex.c, cmetindex.c: Added flag --build-sarray
+
+2015-06-09 twu
+
+ * indel.c: Added debugging statements
+
+ * stage1hr.c: Bypassing gmap on region if mappingend is less than or equal
+ to mappingstart, which can happen if the region is pushed to the beginning
+ or end of the chromosome
+
+ * stage3hr.c: Assigning loop variable to given junctions before we push
+ left_ambig
+
+2015-06-07 twu
+
+ * stage3.c: Reversed last revision, and put trim_novel_spliceends at
+ beginning of path_trim, since putting it at the end results in an infinite
+ loop
+
+2015-06-06 twu
+
+ * stage3hr.c: Added debugging statement
+
+ * stage3.c: Moved trimming of novel spliceends from beginning of path_trim
+ procedure to end
+
+ * pair.c: Fixed computation of circularpos for minus alignments
+
+2015-06-05 twu
+
+ * samprint.c: Removed unused variables
+
+ * stage3hr.c: In printing translocations, getting separate chrs for the two
+ halves. Turned on TRANSLOC_SPECIAL.
+
+ * samprint.c: In printing halfdonors and halfacceptors, comparing endlengths
+ to trimlengths to determine whether to print H or S in CIGAR string
+
+ * samprint.c: Fixed printing of CIGAR strings for minus alignments
+
+2015-06-04 twu
+
+ * stage1hr.c: Added lowpos and highpos to Segment_T object. Rewrote dynamic
+ programming procedures for converting segments to pairs.
+
+2015-06-03 twu
+
+ * stage1hr.c: In converting segments to GMAP, changed criteria for dynamic
+ programming to be relative to anchor_segment and not to segment[k].
+
+ * sarray-read.c, stage3hr.c: Using new interface to Substring_new_ambig
+
+ * substring.c, substring.h: Setting trim_left and trim_right for ambiguous
+ substrings
+
+2015-06-02 twu
+
+ * stage3hr.c, substring.c, substring.h: Renamed outofbounds variables to
+ outofbounds_start and outofbounds_end. Handling the case where the
+ alignment is out of bounds to the left of the current chromosome.
* VERSION: Updated version number
- * chimera.c: Applying patch 162196 from trunk to add range of chimerapos to
- XT field
+ * archive.html, index.html: Made changes for new version
+
+ * stage1hr.c: Handling the case where floors is NULL, such as for a poly-A
+ read
+
+ * stage3hr.c: Fixed genomic segments for converting substrings to GMAP
+
+ * stage1hr.c: For converting segments to GMAP, fixed criteria for allowing
+ non-monotonic query orders and possible insertions
+
+ * stage3hr.c: Fixed bug in referring to uninitialized substring
+
+ * substring.c, substring.h: Removed left_genomicseg field
+
+ * stage3hr.c: In converting substrings to GMAP, using correct genomic
+ nucleotide now
+
+ * gsnap.c: Made batch level 4 the default
+
+ * stage1hr.c: Reordered search algorithms. Limiting number of anchor
+ segments, and pairing up those instead. Disabling doublesplicing
+ algorithm.
+
+ * sarray-read.h: Removed references to sarray_gmap
+
+ * sarray-read.c: Removed references to sarray_gmap
+
+ * pair.c, pair.h: For GSNAP default output format, no longer printing pair
+ info for single-end reads
+
+ * memory-check.pl: Handling results for non-threaded runs
+
+ * sarray-read.c: Fixed memory leak
+
+ * stage1hr.c: Deferring read_oligos until we need them for spanning set or
+ complete set algorithms
+
+ * stage1hr.c: Fixed call to single hit alignment of GMAP. Made batch level
+ 4 the default for memory.
+
+ * stage1hr.c: Allowing terminal alignments only if no single-end alignments
+ are found, or if no concordant alignments are found.
+
+ * sarray-read.c: Fixed memory leak
+
+ * stage1hr.c: Limiting number of anchor segments. Implementing terminal
+ alignments.
+
+ * stage3hr.c: Using new interface to Substring procedures
+
+ * substring.c, substring.h: Removed unused variables
+
+ * samprint.c: Removed obsolete code for printing specific GSNAP types
+
+ * stage1hr.c: Implemented finding of terminals based on anchor segments
+
+ * stage3hr.c: Fixed accumulation of ilength_high. In comparing GMAP against
+ substrings, iterating through all substrings.
+
+ * stage3hr.c, stage3hr.h, substring.c, substring.h: Fixed issues with
+ substring boundaries, computing genomic_diff, and marking mismatches
+
+ * stage2.c: Removed GMAP-specific code from GSNAP
+
+ * samprint.c: Changed call to get querylengths
+
+ * genome128_hr.c, genome128_hr.h: Removed mismatch_offset
+
+2015-05-31 twu
+
+ * samprint.c: Removed references to Pair_check_cigar. Changed calls to get
+ cdna_direction to those for sensedir.
+
+ * pair.c: Removed printing of state
+
+ * substring.c: Removed references to genomicstart_adj and genomicend_adj in
+ converting substrings to pairs
+
+ * stage3hr.h: Removed interface for Stage3end_indel_pos
+
+ * stage3hr.c: Changed calls to Substring_new for insertion and deletion
+ types to conform to new substrings standards, where each substring has its
+ genomicstart and genomicend adjusted for indels. Removed indel_pos and
+ indel_low fields from Stage3end_T object. Removed code for printing
+ separate GSNAP types.
+
+ * stage3hr.c: Setting trim_left, trim_right, trim_left_splicep, and
+ trim_right_splicep for substring hit type
+
+2015-05-30 twu
+
+ * stage3hr.c: Fixed coordinate error in test_hardclips
+
+ * stage3hr.c: Fixed typo
+
+2015-05-29 twu
+
+ * samprint.c, stage3hr.c: Fixed issues in finding substring_low for minus
+ alignments using hardclip_low
+
+ * stage3hr.c: Fixed computation of ilength for substrings
+
+ * 2015-statgen, Ambiguous-splicing.eps, DP-triangles.eps,
+ Diagonalization.eps, Hierarchical-GMAP.eps, Large-hash-table.eps,
+ Makefile.gsnaptoo.am, Overlapping-alignment.eps, README,
+ SIMD-oligomers.eps, Vertical-format.eps, algorithm.tex, biblio.bib,
+ context.tex, diag.c, diag.h, diagpool.c, diagpool.h, discussion.tex,
+ doublelist.c, doublelist.h, features.tex, genome128_hr.c, gmap.c, gsnap.c,
+ indel.c, indel.h, intlist.c, intlist.h, introduction.tex, junction.c,
+ junction.h, list.c, list.h, oligoindex_hr.c, oligoindex_hr.h, pair.c,
+ pair.h, samprint.c, samprint.h, sarray-read.c, sarray-read.h, sequence.c,
+ splice.c, splice.h, splicing-score.c, src, stage1hr.c, stage1hr.h,
+ stage2.c, stage2.h, stage3.c, stage3.h, stage3hr.c, stage3hr.h,
+ substring.c, substring.h, toplevel.tex, trunk, uint8list.c, uint8list.h,
+ uintlist.c, uintlist.h, uniqscan.c, univdiag.c, univdiag.h, univdiagdef.h,
+ util: Merged revisions 162218 to 166640 from
+ branches/2015-03-28-sarray-gmap, 2015-03-31-new-sarray-,
+ 2015-05-07-sarray-ambig, 2015-05-21-segment-gmap, and
+ 2015-05-22-fast-oligoindex
+
+ * config.site.rescomp.tst, trunk: Updated version number
+
+ * index.html: Made changes for 2014-12-29
+
+ * samprint.c: Moved position of #endif line
+
+2015-05-28 twu
+
+ * substring.c: Fixes to debugging statements
+
+ * stage3.c: Fixes to debugging statements
+
+ * samprint.c, samprint.h: Revisions to SAM_compute_chrpos
+
+ * output.c: Using new interface to SAM_compute_chrpos
+
+2015-05-20 twu
+
+ * gmapindex.c, src: Allowing genomecomp to be a command-line argument.
+ Merged changes from branches-2015-05-15-compressed-sarray to allow for
+ compressed suffix arrays.
+
+ * gmap_build.pl.in, util: Providing genomecomp file as a command-line
+ argument, instead of piping it into gmapindex
+
+ * sarray-write.c, sarray-write.h: Merged changes from
+ branches/2015-05-15-compressed-sarray to allow for compressed suffix
+ arrays, but removed csafile needed for debugging
- * iit-read.c: Applying patch 164702 from trunk to check for possibility of a
- zero-length array in IIT_get_highs_for_low and IIT_get_lows_for_high
+ * sarray-read.c: Turning off code for compressed suffix arrays
- * stage3hr.c: Applying patch 164702 from trunk to fix order of LtoH
- substrings for deletions
+ * indexdb-write.c, indexdb-write.h: Allowing the case where genomelength is
+ less than index1part
+
+ * bitpack64-write.h: Improved comments
+
+ * access.c: Merged changes from branches/2015-05-15-compressed-sarray to
+ assign *fd, even if file is empty
+
+ * sarray-read.c: Merged code for compressed suffix array. Implemented
+ different methods for Elt_fill_positions_filtered, depending on whether
+ the filtering occurs more than once.
+
+ * gmap.c: Using new interface to Pair_setup
+
+2015-05-16 twu
+
+ * output.c: Not computing chrpos for SAMECHR_SPLICE and TRANSLOC_SPLICE
+ hittypes
+
+ * gmap.c, gsnap.c, pair.c, pair.h, uniqscan.c: Fixed issue with printing
+ nsnpdiffs for GMAP alignments
+
+ * stage3hr.c: Turned on TRANSLOC_SPECIAL to remove translocations when
+ non-translocation alignments are found. Using effective_chr for printing
+ purposes. Pushing both substrings for a distant splice. Using querystart
+ and queryend instead of querystart_adj and queryend_adj for computing
+ insertlength.
+
+ * samprint.c: Using Substring_compute_chrpos to compute chrpos based on
+ substrings instead of Stage3end_T object
+
+ * substring.c, substring.h: Implemented Substring_compute_chrpos
+
+2015-05-01 twu
+
+ * iit-read.c: Checking for the possibility in IIT_get_highs_for_low and
+ IIT_get_lows_for_high of a zero-length array.
+
+ * stage3hr.c: Fixed order of LtoH substrings for deletions
+
+ * oligoindex_hr.c: Replaced count_fwdrev_simd with individual
+ count_*mer_fwd|rev_simd procedures
+
+ * substring.c: Revised some debugging statements
+
+ * stage3hr.c: Retaining old information about sarrayp when copying a
+ Stage3_T object
+
+ * stage3.c: Initializing max_nmatches to be 0 in end-trimming procedures
+
+ * Makefile.gsnaptoo.am: Added -lrt to get shm commands
+
+ * algorithm.tex, context.tex, features.tex, introduction.tex: Augmented
+ captions
+
+ * biblio.bib: Added references
+
+2015-04-30 twu
+
+ * discussion.tex: Added material
+
+ * biblio.bib, toplevel.tex: Added references
+
+ * algorithm.tex, features.tex, introduction.tex: Added citations
+
+ * discussion.tex: Added text
+
+ * context.tex: Added description of GSTRUCT
+
+ * context.tex, discussion.tex: Moved HTSeqGenie to context.tex
+
+ * introduction.tex: Added caption
+
+ * features.tex: Revisions
+
+ * Diagonalization.eps, Hierarchical-GMAP.eps, Large-hash-table.eps,
+ Overlapping-alignment.eps, SIMD-oligomers.eps: Revised figures
+
+ * algorithm.tex: Expanded caption
+
+ * context.tex: Revisions
+
+2015-04-29 twu
+
+ * algorithm.tex: Revisions
+
+2015-04-29 matthejb
+
+ * discussion.tex: + adding content to discussion
+
+2015-04-29 twu
+
+ * algorithm.tex: Revisions to diagonalization
+
+ * toplevel.tex: Changed symbols for logical operations
+
+ * algorithm.tex: Revisions
+
+ * algorithm.tex: Revisions to linear genome
+
+2015-04-28 matthejb
+
+ * discussion.tex: + initial additions to discussion by MB
+
+2015-04-28 twu
+
+ * algorithm.tex: Moved material on large genomes from features.tex to here
+
+ * introduction.tex: Revisions
+
+ * features.tex: Revisions
+
+ * algorithm.tex: Moved section on ranking alignments and eliminating
+ duplicates to features.tex
+
+ * discussion.tex: Added notes
+
+ * algorithm.tex: Changed table
+
+ * features.tex: Revisions
+
+ * introduction.tex: Revisions
+
+2015-04-27 twu
+
+ * introduction.tex: Revisions
+
+2015-04-27 michafla
+
+ * biblio.bib, context.tex: first draft of gmapR writeup
+
+2015-04-25 twu
+
+ * Hierarchical-GMAP.eps, algorithm.tex, features.tex, introduction.tex,
+ toplevel.tex: Revisions
+
+2015-04-24 twu
+
+ * algorithm.tex, features.tex, introduction.tex: Revisions
+
+ * Ambiguous-splicing.eps, DP-triangles.eps, Diagonalization.eps,
+ Hierarchical-GMAP.eps, Large-hash-table.eps, Overlapping-alignment.eps,
+ SIMD-oligomers.eps, Vertical-format.eps: Added figures
+
+2015-04-23 twu
+
+ * 2015-statgen, algorithm.tex, context.tex, discussion.tex, features.tex,
+ introduction.tex, papers, toplevel.tex: Added directory for editing papers
+
+2015-04-07 twu
+
+ * splice.c: Fixed probability calculation for an ambiguous splice
+
+2015-03-27 twu
+
+ * stage3hr.c: Allowing insertlength to be negative, up to -pairmax, to allow
+ for overlaps. For debugging messages involving insert length, using
+ chromosomal coordinates.
+
+ * stage1hr.c: Added address of GMAP alignment to debugging messages
+
+ * chimera.c: Added information about querypos and homology to XT field for
+ GMAP
+
+ * samprint.c: Removed old version of adjust_hardclips
+
+2015-03-26 twu
+
+ * filestring.c: Turned off debugging output to stdout
+
+ * outbuffer.c: Allow possibility in MPI for output to stdout
+
+ * mpidebug.h: Added tag for writing to stdout
+
+ * mpidebug.c: Handling debugging output for MPI_BOOL_T as an unsigned char
+
+ * master.c, master.h: Allow possibility in MPI for output to stdout
+
+ * gsnap.c: Allow possibility in MPI for output to stdout
+
+ * filestring.c: Allow possibility in MPI for output to stdout
+
+ * gsnap.c: Allowing MPI with only a single thread per rank, by calling
+ Master_parser as a detached thread
+
+ * sarray-read.c: Allowing memory mapping for indexij_access
2015-03-25 twu
+ * gmap.c, gsnap.c: Added USE_MPI checks around final MPI_Barrier
+
* VERSION: Updated version number
- * VERSION, public-2014-12-17, src, stage3hr.c: Merged revisions 161720
- through 161861 from trunk to fix --clip-overlap and --merge-overlap
+ * access.c, access.h, atoiindex.c, cmetindex.c, configure.ac, genome.c,
+ genome.h, get-genome.c, gmap.c, gmapindex.c, gsnap.c, iit-read-univ.c,
+ iit-read.c, index.html, indexdb-write.c, indexdb.c, indexdb.h,
+ indexdbdef.h, outbuffer.c, sarray-read.c, sarray-read.h, sarray-write.c,
+ snpindex.c, src, trunk, uniqscan.c: Merged revisions 161768 through 161939
+ from branches/2015-03-23-shmem to implement shared memory
+
+2015-03-24 twu
+
+ * stage3hr.c: In test_hardclips, checking if low and high coordinates are
+ equal
+
+ * stage3hr.c: Fixed comparison of chrpos in adjust_hardclips_right and
+ adjust_hardclips_left
+
+ * stage3hr.c: In adjust_hardclips, advancing both low_querypos and
+ high_querypos on either failure, to prevent infinite loop
2015-03-23 twu
- * src, stage3hr.c: Merged revision 161669 from trunk to do final
- test_hardclip
+ * stage3hr.c: In adjust_hardclips, advancing either low_querypos or
+ high_querypos if needed
+
+ * stage3hr.c: Doing a final test_hardclip when shift right and shift left
+ are not possible
- * src, substring.c: Merged revision 161663 from trunk to fix alias_circular
- and unalias_circular for genomicstart_adj
+ * substring.c: In alias_circular and unalias_circular, updating
+ genomicstart_adj and genomicend_adj
2015-03-22 twu
- * src, stage3hr.c: Merged revision 161659 from trunk to change endpoint test
- in Stage3end_substring_low
+ * stage3hr.c: Changed endpoint test in Stage3end_substring_low
- * src, stage3hr.c, substring.c, substring.h: Merged revisions 161649 through
- 161651 from trunk to add genomicstart_adj and genomicend_adj
+ * substring.c: Removed debugging string
+
+ * substring.c, substring.h: Added fields genomicstart_adj and genomicend_adj
+ for substring2 of insertions and deletions to handle computations with
+ querypos to obtain a genomic position
+
+ * stage3hr.c: Using genomicstart_adj and genomicend_adj in insertions and
+ deletions to handle computations with querypos to obtain a genomic
+ position
2015-03-21 twu
- * src, stage3hr.c, substring.c, substring.h: Merged revisions 161638 and
- 161639 to restore genomicstart for substring2 of insertions and deletions,
- and to change Substring_convert_to_pairs instead
+ * substring.c, substring.h: Substring_convert_to_pairs now takes
+ genomicstart_indel_adj
+
+ * stage3hr.c: No longer changing left2, genomicstart2, and genomicend2 for
+ substring2 of insertions and deletions. Providing indel adjustments
+ instead to Substring_convert_to_pairs.
+
+ * pair.c: Made Pairarray_contains_p routine look for any case of a gap or
+ indel for a given querypos
+
+ * stage3hr.c: In adjust_hardclips, for dual GMAP, added the ability to shift
+ low_querypos or high_querypos independently to make the low genomicpos and
+ high genomicpos equal.
- * public-2014-12-17: Updated version number
+ * stage3hr.c: In test_hardclips, for dual GMAP, checking that the
+ coordinates match for the two ends
- * pair.c, src: Merged revision 161633 from trunk to make
- Pairarray_contains_p routine look for any gap or indel
+ * stage3hr.c: On recomputing of hardclips near center, decrementing the
+ higher value to make the clipping more even
- * public-2014-12-17, src, stage3hr.c: Merged revisions 161606 and 161608
- from trunk to adjust low_querypos and high_querypos independently
+ * stage3hr.c: Fixed bug in defining left2 for deletion
- * public-2014-12-17, src, stage3hr.c: Merged revision 161603 from trunk to
- make overlap clipping more even
+ * stage3hr.c: In find_ilengths, returning false instead of aborting
- * src, stage3hr.c: Merged revision 161600 from trunk to fix bug in defining
- left2 for deletion
+ * VERSION: Updated version number
+
+ * list.c, list.h: Implemented List_pop_out
+
+ * substring.c: Fixed genomic coordinates to be 0-based when converting from
+ substrings to pairs
- * VERSION, list.c, list.h, pair.c, pair.h, public-2014-12-17, src,
- stage3hr.c, substring.c: Merged revisions 161197 through 161596 from trunk
- to fix --clip-overlap and --merge-overlap
+ * stage3hr.c: In test_hardclips, fixed bug with uninitialized values. In
+ adjust_hardclips, checking querypos, querypos-1, and querypos+1 again.
+ Also, for dual GMAP, checking that genomepos matches for the given
+ low_querypos and high_querypos, meaning that alignments are similar.
+ Always doing a recompute of ilengths after adjust_hardclips. Implemented
+ stripping of gaps and indels that occur between the two parts when doing a
+ merge overlap.
+
+2015-03-20 twu
+
+ * stage3hr.c: Subtracting 1 from alignstart or alignend in computing
+ overlaps. The find_ilengths function returns false if a common point is
+ not found. Added a test_hardclips step and separate right and left shifts
+ for adjust_hardclip. Computing a separate genomicstart2 for substring2 of
+ insertions and deletions.
+
+ * pair.c, pair.h: Implemented Pairarray_lookup
+
+2015-03-19 twu
+
+ * stage3hr.c: Computing second hardclip from its ilength, not overlap. In
+ finding common point involving GMAP, skipping introns and indels. Added
+ code to check that merged overlap pieces are next to each other.
2015-03-18 twu
- * public-2014-12-17, src, stage3hr.c: Merged revision 161197 from trunk to
- fix a fatal bug in adjust_hardclips
+ * stage3hr.c: Fixed bug in some of the initial loops of adjust_hardclips
+
+ * splice.c, stage1hr.c: Using only sensedir and not sensep in calling
+ Substring_new_donor, acceptor, and shortexon
+
+ * stage3hr.c, substring.c, substring.h: Removed unused variables and
+ parameters. Using sensedir instead of sensep.
+
+2015-03-17 twu
+
+ * samprint.c: Removed unused parameters and variables
+
+ * substring.c, substring.h: Making Substring_print_shortexon use sensedir
+ instead of sensep. Removed unused parameters.
+
+ * stage3hr.c: Calling Substring_print_donor, acceptor, and shortexon
+ procedures with sensedir instead of sensep
+
+ * pair.c, pair.h: Removed unused parameters
+
+ * VERSION: Updated version number
+
+ * output.c: Using new interface to SAM_compute_chrpos
+
+ * samprint.c, samprint.h: Corrected calculations in SAM_compute_chrpos
- * VERSION, public-2014-12-17, src: Updated version number
+ * stage3hr.c, stage3hr.h: Using substring_LtoH instead of substring_low and
+ substring_high. Added initial shift in adjust_hardclips. Fixed
+ calculation of overlap to depend only on common_left and common_right.
- * samprint.c, stage3hr.c, stage3hr.h, substring.c, substring.h: Merged
- revisions 160877 through 161118 from trunk to undo use of
- substring_hardclipped, and to use substring_LtoH instead
+ * substring.c, substring.h: Changed Substring_chrstart and Substring_chrend
+ to Substring_alignstart_chr and Substring_alignend_chr
+
+ * output.c, samprint.c, samprint.h: Did a reverse merge to undo revision
+ 160876 which used substring_hardclipped instead of substring_low
2015-03-13 twu
- * outbuffer.c, samprint.c, samprint.h: Applied revision 160876 from trunk to
- change SAM_compute_chrpos to search for the hardclipped substring, rather
- than using substring_low
+ * VERSION: Updated version number
+
+ * output.c, samprint.c, samprint.h: Revised SAM_compute_chrpos to search for
+ the hardclipped substring, rather than using substring_low
+
+ * stage3hr.c: Changed comment
* shortread.c: Initializing nextchar2 in various procedures
- * stage3hr.c: Applied revision 160755 from trunk to restore correct ilength
- calculations for minus strand and adjust hardclips by checking adjacent
- positions left and right of the crossover querypos.
+ * gsnap.c: Fixed small memory leak
+
+2015-03-11 twu
+
+ * stage3hr.c: Adjusting hardclips by checking adjacent positions left and
+ right of the crossover querypos.
+
+ * substring.c: Removed comment
+
+ * stage3hr.c: Restored correct ilength calculations for minus strand
2015-03-06 twu
- * stage3hr.c: Fixed bampair_sort_cmp to eliminate duplicates that were not
- being removed
+ * VERSION, config.site.rescomp.prd, config.site.rescomp.tst, index.html:
+ Updated version number
+
+ * stage3hr.c: Added comparisons in hitpair_sort_cmp to fix issue where
+ duplicate alignments were not being put together for removal
- * stage3hr.c: Fixes made to computation of overlap
+ * oligoindex_hr.c: Implemented bit twiddling and SIMD-based method for
+ computing reverse_nt
2015-03-03 twu
- * VERSION: Updated version number
+ * stage3.c: Removed automatic trimming of ends less than 12 bp. Fixed bug
+ in assigning splice pair in end trimming procedures.
+
+ * ax_ext.m4: Performing run test for tzcnt_u32 and tzcnt_u64
+
+ * stage3hr.c: Made minor fixes in --clip-overlap feature, including fixes to
+ gaps and overlaps, more even division of overlaps, and preference for
+ clipping heads rather than tails in cases of ties
+
+ * stage3.c: Turning off branch that can lead to bad CIGAR strings
+
+ * inbuffer.c: Defining variable needed when MPI_FILE_INPUT is specified
+
+ * gsnap.c: Doing a chromosome_iit_setup before worker_setup
+
+ * genome128_hr.c: Using HAVE_TZCNT instead of HAVE_BMI1
+
+2015-02-25 twu
+
+ * stage1hr.c, stage3hr.c, stage3hr.h: Printing an accession when reporting a
+ CIGAR error
- * ax_ext.m4: Checking for tzcnt to run successfully
+ * inbuffer.c, inbuffer.h: Changed nspaces to be an unsigned int
- * genome128_hr.c: Changed from HAVE_BMI1 to HAVE_TZCNT
+ * gsnap.c: Moved pthread_attr_init to places just before they are needed
- * stage3.c: Turned off branch that can lead to bad CIGAR strings
+ * Makefile.gsnaptoo.am: Added master.c and master.h as extra files to be
+ distributed
+
+ * master.c: Added pre-processor macros
+
+ * gsnap.c: Added pre-processor macro around inclusion of master.h
+
+ * Makefile.gsnaptoo.am, VERSION, config.site.rescomp.prd,
+ config.site.rescomp.tst, filestring.c, filestring.h, gsnap.c, inbuffer.c,
+ inbuffer.h, index.html, master.c, master.h, mpidebug.c, mpidebug.h, src,
+ trunk, util: Merged revisions 158119 through 159424 from
+ branches/2015-02-05-mpi-workers-0 to allow for worker threads in rank 0
2015-02-12 twu
- * VERSION, chimera.c, pair.c, pair.h, public-2014-12-17, src: Applied patch
- 158533 from trunk for GMAP chimeras. Adding a pre-extension slop in
- finding paths to pair, but not when finding a breakpoint between the final
- paths.
+ * gmap.c: Added debugging statements
+
+ * chimera.c, pair.c, pair.h: Providing Pair_pathscores with a
+ pre_extension_slop parameter. Distinguishing between call to
+ Pair_pathscores when finding non-extended paths to pair up, and when
+ finding a breakpoint between the final, extended paths.
+
+ * outbuffer.c: Rearranged procedures for compilation to work
+
+ * pair.c: In Pair_print_sam, always doing a Pair_compute_cigar
+
+ * outbuffer.c: Printing SAM headers on empty files
2015-02-10 twu
- * stage1.c: Merging revision 158350 from trunk to limit number of results in
- find_range to 100 to avoid getting bogged down in repeats
+ * gmap.c: Allowing PMAP to have variables for gff3_separators_p
+
+ * gmap.c, gsnap.c, pair.c, pair.h, uniqscan.c: For gff3 output, always
+ adding a separator line. Added --gff3-add-separators flag to GMAP.
- * gmap.c, gsnap.c, pair.c, pair.h, uniqscan.c: Merging revision 158352 from
- trunk to always add a separator line for gff3 output. Added
- --gff3-add-separators flag to GMAP.
+ * stage1.c: In find_range, limiting number of results to 100 to avoid
+ getting bogged down in repeats
- * gff3_genes.pl.in, gff3_introns.pl.in, gff3_splicesites.pl.in, util: Merged
- revision 158348 from trunk to always read chr from line for gff3 files
- without a gene name
+ * gff3_genes.pl.in, gff3_introns.pl.in, gff3_splicesites.pl.in: For gff3
+ files without a gene name, always read $chr from line
+
+2015-02-05 twu
+
+ * pair.c: GMAP always recompute cigar_tokens, in case merging has affected
+ them
2015-02-04 twu
- * VERSION, public-2014-12-17, src: Updated version number
+ * pair.c: Added slop in computing Pair_pathscores, to allow for better
+ identification of translocations
+
+ * gmap.c: Improved debugging statements
- * pair.c: Applied patch 158025 to add merge slop in Pair_pathscores to allow
- better identification of merges
+ * chimera.c: Changed type of some debugging statements
2015-02-03 twu
- * VERSION, gmap.c, pair.c, pair.h, public-2014-12-17, src, stage1hr.c,
- stage3.c, stage3.h: Merged revisions 157718 to 157789 from trunk to
- compute goodness for Stage3_T objects every time pairarray is generated
+ * VERSION, gmap.c, gsnap.c, pair.c, pair.h, samprint.c, src, stage3.c,
+ stage3hr.c, stage3hr.h, trunk, uniqscan.c: Merged revisions 157793 through
+ 157918 from branches/2015-01-30-cigar-check to create and check cigar
+ strings when Stage3_T or Stage3end_T objects are created
+
+2015-01-30 twu
+
+ * stage1hr.c: Using new interface to Stage3_compute
+
+ * gmap.c: Using new interface to Stage3_compute and Stage3_new. No longer
+ calling Stage3_recompute_goodness.
+
+ * pair.c, pair.h: Implemented Pair_fracidentity_array, which returns
+ goodness
+
+ * stage3.h: Changed Stage3_recompute_goodness to Stage3_compute_mapq.
+ Always recomputing matches and goodness when this->pairarray is assigned.
+ Removed references to END_KNOWNSPLICING_SHORTCUT.
+
+ * stage3.c: Changed Stage3_recompute_goodness to Stage3_compute_mapq.
+ Always recomputing matches and goodness when this->pairarray is assigned.
+ Removed references to END_KNOWNSPLICING_SHORTCUT.
2015-01-29 twu
- * outbuffer.c: Moved mutex of locks outside of loops to clear out backlog in
- Outbuffer_thread_anyorder and Outbuffer_thread_ordered
+ * stage3.c: In Stage3_cmp, using npairs and matches as secondary criteria
+ beyond goodness
+
+ * gmap.c: Cleaned up unused variables and parameters. Using new interface
+ to Stage3_compute
+
+ * filestring.c: Added ability to handle %f
+
+ * stage3.c, stage3.h: Cleaned up unused variables and parameters
+
+ * stage1hr.c: Using new interface to Stage3_compute
+
+ * pair.c: Using false instead of 0
+
+2015-01-28 twu
+
+ * gmap.c: Added call to Outbuffer_cleanup()
+
+ * outbuffer.c: Moved lock outside of loop to prevent a race condition
+
+ * inbuffer.c: Removed check of nextchar == EOF, which causes standard GSNAP
+ and GMAP not to terminate
+
+ * shortread.c, shortread.h: Fixed some issues with variable names for MPI
+ code
+
+ * outbuffer.c, outbuffer.h: Added Outbuffer_cleanup, which frees array of
+ outputs
+
+ * inbuffer.c: Allowing for gzipped and bzipped2 files in MPI version by
+ sending and receiving filecontents
+
+ * gsnap.c: Calling Outbuffer_cleanup
+
+ * gmap.c: Revealed variable needed for debugging
+
+ * filestring.c, filestring.h: Implemented Filestring_send and
+ Filestring_recv
+
+ * compress.c: Fixed comment
+
+2015-01-27 twu
+
+ * shortread.c: Made code consistent across text, gzip and bzip2. Added
+ hooks for filling a Filestring_T object in gzip and bzip2 procedures.
+
+2015-01-26 twu
+
+ * index.html: Updated for 2014-12-17.v2
+
+ * shortread.c, shortread.h: Using workers_comm in MPI_fopen
+
+ * mpidebug.c, mpidebug.h: Using workers_comm in MPI_fopen
+
+ * inbuffer.c, inbuffer.h: Passing workers_comm to
+ Shortread_read_filecontents
- * stage3.c: Applied revision 157718 from trunk to use npairs and matches as
- secondary criteria beyond goodness in Stage3_cmp
+ * gsnap.c: Introduced a workers_comm so MPI_File_open and MPI_File_close can
+ be restricted to that group
- * stage3.c: Fixed wrong variable name in call to score_introns
+ * shortread.c: Added debugging statements for opening and closing files
+
+ * gsnap.c: Added debugging statements for opening and closing files. For
+ MPI master using MPI_File input, explicitly closing those inputs.
+
+ * gsnap.c: Using new interfaces to Inbuffer_setup, Inbuffer_new, and
+ Inbuffer_master_process. Master rank 0 no longer calling Inbuffer_new.
+
+ * gmap.c: Using new interface to Inbuffer_new
+
+ * inbuffer.c: No longer making a special case in fill_buffer for MPI when
+ nextchar at end of block is EOF.
+
+ * shortread.c, shortread.h: MPI procedures for reading from filecontents
+ also close and open input files
+
+ * inbuffer.h: Moved nspaces into Inbuffer_T object and into Inbuffer_new
+ instead of Inbuffer_setup. Made Inbuffer_master_process free of an
+ Inbuffer_T object.
+
+ * inbuffer.c: Moved nspaces into Inbuffer_T object and into Inbuffer_new
+ instead of Inbuffer_setup. Made Inbuffer_master_process free of an
+ Inbuffer_T object.
+
+2015-01-23 twu
+
+ * inbuffer.c: Added comments
+
+ * gsnap.c: Created separate worker_setup and worker_cleanup procedures
+
+ * inbuffer.c: Assigning filecontents buffers to the IN category for memusage
+
+ * config.site.rescomp.prd, gsnap.c, inbuffer.c, inbuffer.h, shortread.c,
+ shortread.h, src, trunk: Merged revisions 157242 to 157253 from
+ branches/2015-01-22-mpi-file-block to have worker ranks read blocks into a
+ buffer
2015-01-22 twu
* memchk.c, popcount.c: Added include of config.h
- * VERSION, bitpack64-read.h, bitpack64-serial-read.h, compress.h, dynprog.h,
+ * configure.ac: Changed variable name from USE_MPI_FILE to
+ USE_MPI_FILE_INPUT
+
+ * samheader.h: Added include of <mpi.h>
+
+ * oligoindex_pmap.h: Added explanation of why config.h needs to be included
+
+ * iit-read-univ.h: Added include of <mpi.h>
+
+ * gsnap.c, inbuffer.c, inbuffer.h, shortread.c, shortread.h: Checking for
+ both USE_MPI and USE_MPI_FILE_INPUT in using MPI_File for input
+
+ * chrsubset.h: Added blank line for formatting
+
+ * bitpack64-read.h, bitpack64-serial-read.h, compress.h, dynprog.h,
except.h, genomicpos.h, iit-read.h, indexdb-write.h, indexdb.h,
- indexdbdef.h, oligoindex_hr.h, popcount.h, public-2014-12-17, samprint.h,
- sequence.h, sortinfo.h, src: Merged revisions 157224 and 157225 from trunk
- to remove and add config.h from header files
+ indexdbdef.h, popcount.h, sequence.h: Added include of config.h
-2015-01-16 twu
+ * filestring.h, mpidebug.h, oligoindex_hr.h, samprint.h, sortinfo.h: Removed
+ include of config.h, since not necessary
+
+ * bigendian.h, fopen.h, iitdef.h, littleendian.h, mem.h, oligoindex.h,
+ types.h: Added explanation of why config.h needs to be included
+
+ * atoi.h, bitpack64-write.h, cmet.h: Added $Id$ string
+
+ * access.h, alphabet.h, backtranslation.h, block.h, boyer-moore.h,
+ bp-read.h, bp-write.h, bytecoding.h, bzip2.h, chrom.h, chrsegment.h,
+ datadir.h, diag.h, diagdef.h, diagpool.h, genome-write.h,
+ genome128-write.h, genome_hr.h, genome_sites.h, genomepage.h, gregion.h,
+ iit-write-univ.h, iit-write.h, indel.h, indexdb_hr.h, interval.h,
+ intlist.h, intpool.h, intron.h, match.h, matchdef.h, matchpool.h,
+ maxent128_hr.h, maxent_hr.h, oligo.h, oligop.h, pairdef.h, parserange.h,
+ reader.h, stage1.h, tableuint8.h, tally.h, translation.h, univinterval.h:
+ Added blank line for formatting
+
+2015-01-21 twu
+
+ * stage3hr.c: Turning on SOFT_CLIPS_AVOID_CIRCULARIZATION again to avoid
+ duplicates in circular chromosomes
+
+ * ax_mpi.m4: Added cc to list of possible values for MPICC, for systems that
+ use a wrapper called cc
+
+ * shortread.c: Fixed parsing issues for blank lines and ends of files
+
+ * configure.ac: Added configure flag --enable-mpi-file
+
+ * Makefile.gsnaptoo.am: Removed mpi_gmap for now
+
+ * gsnap.c, pair.c, pair.h: Added noprint option for --action-if-cigar-error
+ and made it the default
+
+ * gsnap.c, inbuffer.c: Made -q or --part flag work for MPI code
+
+ * inbuffer.c: Added ending brace for MPI code
+
+ * shortread.c: Fixed bug in a print statement where a pointer was not being
+ provided. In input_oneline, making a single read to get nextchar.
+
+ * inbuffer.c: Not doing fseek if nextchar is EOF
+
+ * gsnap.c: Removed a debugging statement
+
+ * filestring.c: Increased size of buffer
+
+ * outbuffer.c, outbuffer.h: Added parameter for output_file
+
+ * gmap.c: Using new interface to Outbuffer_setup and
+ Outbuffer_print_filestrings
+
+ * samheader.c, samheader.h: Applied changes from branches/2015-01-17-mpi-seq
+
+ * outbuffer.c, outbuffer.h: Applied changes from
+ branches/2015-01-17-mpi-seq. Removed code for Outbuffer_mpi_process.
+
+ * inbuffer.c: Removed requestid variable from fill_buffer for GMAP
+
+ * iit-read-univ.c, iit-read-univ.h: Applied changes from
+ branches/2015-01-17-mpi-seq
+
+ * gsnap.c: Applied changes from branches/2015-01-17-mpi-seq
+
+ * gmap.c: Put in dummy variables for Inbuffer_new
+
+ * filestring.c, filestring.h: Applied changes from
+ branches/2015-01-17-mpi-seq
+
+2015-01-20 twu
- * gmap.c, pair.c, public-2014-12-17, src, stage2.c, stage3.c: Merged
- revision 156845 from trunk to make better decisions for last exons having
- partial alignments
+ * config.site.rescomp.tst, filestring.c, filestring.h, gsnap.c, inbuffer.c,
+ inbuffer.h, mpidebug.c, mpidebug.h, outbuffer.c, shortread.c, shortread.h,
+ src, trunk: Merged revisions 156908 to 157083 from
+ branches/2015-01-17-mpi-seq to change the input side of mpi_gsnap
- * public-2014-12-17: Created release branch from public-2014-12-16
+ * index.html: Updated for version 2014-12-17
+
+ * VERSION: Updated version number
+
+ * samprint.c: Consolidated print statements
+
+ * output.c: Defining abbrev for a nomapper
+
+ * diag.c: Added debugging statement
+
+2015-01-16 twu
+
+ * gmap.c, pair.c, stage2.c, stage3.c: Merged revisions 156824 to 156843 from
+ branches/2015-01-15-fix-chimeras to make better decisions for last exons
+ having partial alignments
2015-01-15 twu
- * oligoindex_hr.c: Applied patch 156816 from trunk to allow all diagonals in
- Oligoindex_get_mappings
+ * oligoindex_hr.c: Allowing diagonals where ptr->i < querylength. Reveals
+ alignments that were otherwise missed.
- * chimera.c, chimera.h, gmap.c: Applied patch 156811 from trunk to fix
- non-exon-exon breakpoint and dinucleotides
+ * gmap.c: Fixed debugging statements to use Sequence_stdout instead of
+ Sequence_print
-2015-01-14 twu
+ * chimera.c, chimera.h, gmap.c: Fixed algorithm for finding non-exon-exon
+ chimeric breakpoint and finding dinucleotides
- * stage2.c: Applied patch 156104 from trunk to fix uninitialized variable
- for firstactive
+2015-01-14 twu
* stage3hr.c: In anomalous_splice_p procedures, checking for samechr_splice
hittypes
- * stage1hr.c: Not running GMAP on samechr_splice hittypes
+ * stage1hr.c: Not applying GMAP to samechr_splice hittypes
+
+2015-01-07 twu
+
+ * oligoindex_hr.c: Fixed type for positions_space field in Oligoindex_T
+
+ * oligoindex_hr.c: Fixed type for positions_space field in Oligoindex_T
+
+ * oligoindex_hr.c, oligoindex_hr.h, oligoindex_old.c, oligoindex_old.h, src,
+ stage2.c, trunk: Merged revisions 154793 through 156263 from
+ branches/2014-12-06-stage2-larger-kmers to allow for 9-mers in stage 2
+
+ * config.site.rescomp.prd, config.site.rescomp.tst: Updated version number
+
+ * VERSION: Updated version number
+
+ * index.html: Added changes for version 2014-12-16 (v2)
+
+ * substring.c: Fixed assertions to account for out-of-bounds regions
+
+ * README: Added explanation of XI field
+
+ * pair.c, samprint.c, shortread.c, shortread.h: Added code for XI field
- * stage1hr.c: Applied patch 156105 from trunk to use correct typecast of
- ambcoords to (Uint8list_T) NULL for large genomes
+2015-01-05 twu
+
+ * stage1hr.c: Using correct typecast of ambcoords to (Uint8list_T) NULL for
+ large genomes
+
+ * stage2.c: Fixed uninitialized variable for firstactive
+
+2014-12-17 twu
+
+ * gsnap.c, uniqscan.c: Using new interface to Stage3hr_setup
+
+ * stage3hr.c, stage3hr.h: Computing outofbounds_left and outofbounds_right.
+ Using new interface to Substring_new.
+
+ * substring.c, substring.h: Added provision for outofbounds_left and
+ outofbounds_right, to be considered part of trimming
2014-12-16 twu
- * sarray-read.c: Applied patch 155495 to fix typo of spliceends_antisense to
- spliceends_sense
+ * gsnap.c: Changed input sequence to open input streams to get one character
+ and determine if it is FASTQ format, and then to do Shortread_setup, and
+ then to fill the inbuffer.
- * samheader.c: Applied patch 155409 to not print tabs if there are no
- headers
+ * sarray-read.c: Fixed typo: spliceends_antisense => spliceends_sense
+
+ * substring.c: Removed debugging statement
+
+ * samheader.c: Not printing tabs if there are no headers
+
+ * sam_sort.c: Setting fileposition variable for each file
+
+ * filestring.c: Handling the case where filestring is NULL
+
+2014-12-12 twu
+
+ * doublelist.c: Fixed type error in doublelist_to_array_out
+
+ * Makefile.gsnaptoo.am, config.site.rescomp.prd, gsnap.c, samprint.c, src,
+ stage1hr.c, stage1hr.h, stage3hr.c, substring.c, substring.h, trunk,
+ uniqscan.c: Merged revisions 154499 through 155289 from
+ branches/2014-12-03-dna-chimeras
+
+ * VERSION, config.site.rescomp.prd: Updated version number
+
+ * sam_sort.c: Revised sam_sort to handle multiple input files
+
+ * Makefile.am, Makefile.gsnaptoo.am, VERSION, access.c, ax_mpi.m4,
+ backtranslation.c, backtranslation.h, bool.h, bootstrap.gsnaptoo,
+ chimera.c, chimera.h, config.site, config.site.rescomp.prd,
+ config.site.rescomp.tst, configure.ac, filestring.c, filestring.h,
+ genomicpos.c, genomicpos.h, get-genome.c, gmap.c, gsnap.c,
+ iit-read-univ.c, iit-read-univ.h, iit-read.c, iit-read.h, inbuffer.c,
+ inbuffer.h, md5.c, md5.h, mem.c, mem.h, memory-check.pl, mpi, mpidebug.c,
+ mpidebug.h, outbuffer.c, outbuffer.h, output.c, output.h, pair.c, pair.h,
+ request.c, request.h, resulthr.c, resulthr.h, revcomp.c, sam_sort.c,
+ samflags.h, samheader.c, samheader.h, samprint.c, samprint.h,
+ sarray-read.c, segmentpos.c, segmentpos.h, sequence.c, sequence.h,
+ shortread.c, shortread.h, src, stage1hr.c, stage2.c, stage2.h, stage3.c,
+ stage3.h, stage3hr.c, stage3hr.h, substring.c, substring.h, translation.c,
+ translation.h, trunk, types.h, uniqscan.c: Merged revisions 154226 to
+ 155279 from branches/2014-11-27-mpi to implement MPI versions and to use
+ Filestring_T objects for all output
+
+ * VERSION, config.site.rescomp.prd, index.html: Updated version number
+
+ * genome.c, genome.h: Changed type of gbuffer from unsigned char to char
- * doublelist.c: Applied patch 155291 to fix type
+2014-12-10 twu
- * public-2014-12-16: Created release version from revision 154791
+ * oligoindex_hr.c: Added code for handling 9-mers
2014-12-06 twu
diff --git a/Makefile.am b/Makefile.am
index 9c69a2a..4e9c4d5 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,5 +1,5 @@
-SUBDIRS = src util tests
+SUBDIRS = src mpi util tests
EXTRA_DIST = VERSION NOTICE config.site
diff --git a/Makefile.in b/Makefile.in
index d6f6e00..c92bdf8 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -37,7 +37,7 @@ target_triplet = @target@
subdir = .
DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \
$(srcdir)/Makefile.in $(top_srcdir)/configure AUTHORS COPYING \
- ChangeLog INSTALL NEWS config/compile config/config.guess \
+ ChangeLog INSTALL NEWS TODO config/compile config/config.guess \
config/config.sub config/depcomp config/install-sh \
config/ltmain.sh config/missing
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
@@ -150,9 +150,6 @@ EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GMAPDB = @GMAPDB@
-GOBY_CFLAGS = @GOBY_CFLAGS@
-GOBY_LDFLAGS = @GOBY_LDFLAGS@
-GOBY_LIBS = @GOBY_LIBS@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
@@ -172,6 +169,7 @@ MAX_READLENGTH = @MAX_READLENGTH@
MKDIR_P = @MKDIR_P@
MPICC = @MPICC@
MPILIBS = @MPILIBS@
+MPI_CFLAGS = @MPI_CFLAGS@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
@@ -255,7 +253,7 @@ target_vendor = @target_vendor@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
-SUBDIRS = src util tests
+SUBDIRS = src mpi util tests
EXTRA_DIST = VERSION NOTICE config.site
CLEANFILES = so_locations
ACLOCAL_AMFLAGS = -I config
diff --git a/README b/README
index 98274ba..9e7ee80 100644
--- a/README
+++ b/README
@@ -815,6 +815,10 @@ XH: Prints the part of the query sequence that was hard-clipped.
Sequence is printed in plus-genomic order and replaces the "H" part of
the CIGAR string.
+XI: Prints the part of the quality string that was hard-clipped.
+Sequence is printed in plus-genomic order and replaces the "H" part of
+the CIGAR string.
+
XS: Prints the strand orientation (+ or -) for a splice. Appears only
if splicing is allowed (-N or -s flag provided), and only for reads
containing a splice. The value "+" means the expected GT-AG, GC-AG,
@@ -859,11 +863,12 @@ genome, and with the exception of indels, should equal the value of
NM.
XG: Indicates which method within GSNAP generated the alignment. A:
-suffix array method, T: terminal alignment, M: GMAP method, O: merging
-of overlaps. Absence of XG flag indicates the standard GSNAP hash
-table method. (Note: older versions of GSNAP used "PG:", but some
-downstream software required all PG methods to be listed in the header
-section, so we changed the field name to "XG:")
+suffix array method, B: GMAP alignment produced from suffix array, M:
+GMAP alignment produced from GSNAP hash table method, T: terminal
+alignment, O: merging of overlaps. Absence of XG flag indicates the
+standard GSNAP hash table method. (Note: older versions of GSNAP used
+"PG:", but some downstream software required all PG methods to be
+listed in the header section, so we changed the field name to "XG:")
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..c5a7bd7
--- /dev/null
+++ b/TODO
@@ -0,0 +1,3 @@
+
+Add flag that allows for splitting afterwards.
+
diff --git a/VERSION b/VERSION
index ab18138..00ee667 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2014-12-29
\ No newline at end of file
+2015-06-10
\ No newline at end of file
diff --git a/config.site b/config.site
index 95a8478..41c93c5 100644
--- a/config.site
+++ b/config.site
@@ -100,6 +100,11 @@
## CFLAGS='-O3 -m64' (for Macintosh machines)
+## Debugging and optimization options for the MPICC compiler, used
+## for the MPI versions of the programs.
+## MPI_CFLAGS='-mpitrace'
+
+
## Search directory for header files ('-IDIR') and any other
## miscellaneous options for the C preprocessor and compiler.
## CPPFLAGS=-I/usr/local/include
diff --git a/config/ax_mpi.m4 b/config/ax_mpi.m4
index 5b2322c..5b4921a 100755
--- a/config/ax_mpi.m4
+++ b/config/ax_mpi.m4
@@ -72,7 +72,7 @@ AC_PREREQ(2.50) dnl for AC_LANG_CASE
AC_LANG_CASE([C], [
AC_REQUIRE([AC_PROG_CC])
AC_ARG_VAR(MPICC,[MPI C compiler command])
- AC_CHECK_PROGS(MPICC, mpicc hcc mpxlc_r mpxlc mpcc cmpicc, $CC)
+ AC_PATH_PROGS(MPICC, mpicc hcc mpxlc_r mpxlc mpcc cmpicc cc)
ax_mpi_save_CC="$CC"
CC="$MPICC"
AC_SUBST(MPICC)
@@ -80,7 +80,7 @@ AC_LANG_CASE([C], [
[C++], [
AC_REQUIRE([AC_PROG_CXX])
AC_ARG_VAR(MPICXX,[MPI C++ compiler command])
- AC_CHECK_PROGS(MPICXX, mpic++ mpicxx mpiCC hcp mpxlC_r mpxlC mpCC cmpic++, $CXX)
+ AC_PATH_PROGS(MPICXX, mpic++ mpicxx mpiCC hcp mpxlC_r mpxlC mpCC cmpic++, $CXX)
ax_mpi_save_CXX="$CXX"
CXX="$MPICXX"
AC_SUBST(MPICXX)
@@ -88,7 +88,7 @@ AC_LANG_CASE([C], [
[Fortran 77], [
AC_REQUIRE([AC_PROG_F77])
AC_ARG_VAR(MPIF77,[MPI Fortran 77 compiler command])
- AC_CHECK_PROGS(MPIF77, mpif77 hf77 mpxlf_r mpxlf mpf77 cmpifc, $F77)
+ AC_PATH_PROGS(MPIF77, mpif77 hf77 mpxlf_r mpxlf mpf77 cmpifc, $F77)
ax_mpi_save_F77="$F77"
F77="$MPIF77"
AC_SUBST(MPIF77)
@@ -96,7 +96,7 @@ AC_LANG_CASE([C], [
[Fortran], [
AC_REQUIRE([AC_PROG_FC])
AC_ARG_VAR(MPIFC,[MPI Fortran compiler command])
- AC_CHECK_PROGS(MPIFC, mpif90 mpxlf95_r mpxlf90_r mpxlf95 mpxlf90 mpf90 cmpif90c, $FC)
+ AC_PATH_PROGS(MPIFC, mpif90 mpxlf95_r mpxlf90_r mpxlf95 mpxlf90 mpf90 cmpif90c, $FC)
ax_mpi_save_FC="$FC"
FC="$MPIFC"
AC_SUBST(MPIFC)
diff --git a/configure b/configure
index 7ba6b5b..9689771 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.63 for gmap 2014-12-29.
+# Generated by GNU Autoconf 2.63 for gmap 2015-06-10.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -745,8 +745,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2014-12-29'
-PACKAGE_STRING='gmap 2014-12-29'
+PACKAGE_VERSION='2015-06-10'
+PACKAGE_STRING='gmap 2015-06-10'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
ac_unique_file="src/gmap.c"
@@ -790,9 +790,6 @@ ac_subst_vars='am__EXEEXT_FALSE
am__EXEEXT_TRUE
LTLIBOBJS
LIBOBJS
-GOBY_LIBS
-GOBY_LDFLAGS
-GOBY_CFLAGS
BZLIB_LIBS
ZLIB_LIBS
MAX_READLENGTH
@@ -824,6 +821,8 @@ EGREP
GREP
SED
LIBTOOL
+MPI_FOUND_FALSE
+MPI_FOUND_TRUE
MPILIBS
MPICC
PERL
@@ -882,6 +881,7 @@ build_os
build_vendor
build_cpu
build
+MPI_CFLAGS
CFLAGS
target_alias
host_alias
@@ -933,6 +933,7 @@ with_pic
enable_fast_install
with_gnu_ld
enable_libtool_lock
+enable_mpi_file_input
enable_pthreads
enable_alloca
enable_mmap
@@ -947,12 +948,12 @@ enable_simd
with_gmapdb
enable_zlib
enable_bzlib
-with_goby
'
ac_precious_vars='build_alias
host_alias
target_alias
CFLAGS
+MPI_CFLAGS
CC
LDFLAGS
LIBS
@@ -1512,7 +1513,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2014-12-29 to adapt to many kinds of systems.
+\`configure' configures gmap 2015-06-10 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1583,7 +1584,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2014-12-29:";;
+ short | recursive ) echo "Configuration of gmap 2015-06-10:";;
esac
cat <<\_ACEOF
@@ -1601,6 +1602,9 @@ Optional Features:
--enable-fast-install[=PKGS]
optimize for fast installation [default=yes]
--disable-libtool-lock avoid locking (might break parallel builds)
+ --enable-mpi-file-input Enable MPI_File for input for MPI versions
+ (default=yes). Note: Use only if you have a parallel
+ I/O file system, like Lustre
--enable-pthreads Enable pthreads (default=yes)
--enable-alloca Enable alloca (default=yes) for stack-based memory
allocation.
@@ -1636,11 +1640,10 @@ Optional Packages:
both]
--with-gnu-ld assume the C compiler uses GNU ld [default=no]
--with-gmapdb=DIR Default GMAP database directory
- --with-goby=DIR Location of Goby header files (in DIR/include) and
- library files (in DIR/lib) (optional)
Some influential environment variables:
CFLAGS Compiler flags (default: -O3)
+ MPI_CFLAGS Compiler flags (default: -O3)
CC C compiler command
LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a
nonstandard directory <lib dir>
@@ -1718,7 +1721,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2014-12-29
+gmap configure 2015-06-10
generated by GNU Autoconf 2.63
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1732,7 +1735,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2014-12-29, which was
+It was created by gmap $as_me 2015-06-10, which was
generated by GNU Autoconf 2.63. Invocation command line was
$ $0 $@
@@ -2102,8 +2105,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:$LINENO: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:$LINENO: result: 2014-12-29" >&5
-$as_echo "2014-12-29" >&6; }
+{ $as_echo "$as_me:$LINENO: result: 2015-06-10" >&5
+$as_echo "2015-06-10" >&6; }
### Read defaults
@@ -2192,6 +2195,12 @@ else
$as_echo "$CFLAGS" >&6; }
fi
+{ $as_echo "$as_me:$LINENO: checking MPI_CFLAGS" >&5
+$as_echo_n "checking MPI_CFLAGS... " >&6; }
+
+{ $as_echo "$as_me:$LINENO: result: $MPI_CFLAGS" >&5
+$as_echo "$MPI_CFLAGS" >&6; }
+
@@ -3660,6 +3669,7 @@ program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"`
#AM_INIT_AUTOMAKE([no-dependencies])
+#AM_INIT_AUTOMAKE(AC_PACKAGE_NAME, AC_PACKAGE_VERSION)
am__api_version='1.11'
# Find a good install program. We prefer a C program (faster),
@@ -4161,8 +4171,8 @@ fi
# Define the identity of the package.
- PACKAGE=gmap
- VERSION=2014-12-29
+ PACKAGE='gmap'
+ VERSION='2015-06-10'
cat >>confdefs.h <<_ACEOF
@@ -5288,26 +5298,29 @@ fi
- for ac_prog in mpicc hcc mpxlc_r mpxlc mpcc cmpicc
+
+ for ac_prog in mpicc hcc mpxlc_r mpxlc mpcc cmpicc cc
do
# Extract the first word of "$ac_prog", so it can be a program name with args.
set dummy $ac_prog; ac_word=$2
{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_MPICC+set}" = set; then
+if test "${ac_cv_path_MPICC+set}" = set; then
$as_echo_n "(cached) " >&6
else
- if test -n "$MPICC"; then
- ac_cv_prog_MPICC="$MPICC" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+ case $MPICC in
+ [\\/]* | ?:[\\/]*)
+ ac_cv_path_MPICC="$MPICC" # Let the user override the test with a path.
+ ;;
+ *)
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
- ac_cv_prog_MPICC="$ac_prog"
+ ac_cv_path_MPICC="$as_dir/$ac_word$ac_exec_ext"
$as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
@@ -5315,9 +5328,10 @@ done
done
IFS=$as_save_IFS
+ ;;
+esac
fi
-fi
-MPICC=$ac_cv_prog_MPICC
+MPICC=$ac_cv_path_MPICC
if test -n "$MPICC"; then
{ $as_echo "$as_me:$LINENO: result: $MPICC" >&5
$as_echo "$MPICC" >&6; }
@@ -5329,7 +5343,6 @@ fi
test -n "$MPICC" && break
done
-test -n "$MPICC" || MPICC="$CC"
ax_mpi_save_CC="$CC"
CC="$MPICC"
@@ -5638,6 +5651,16 @@ _ACEOF
:
fi
# Sets MPICC to use for isolated source files that need it
+# AC_PROG_CC_MPI # This sets CC to mpicc
+
+ if test "x$MPILIBS" != x; then
+ MPI_FOUND_TRUE=
+ MPI_FOUND_FALSE='#'
+else
+ MPI_FOUND_TRUE='#'
+ MPI_FOUND_FALSE=
+fi
+
case `pwd` in
*\ * | *\ *)
@@ -6258,13 +6281,13 @@ if test "${lt_cv_nm_interface+set}" = set; then
else
lt_cv_nm_interface="BSD nm"
echo "int some_variable = 0;" > conftest.$ac_ext
- (eval echo "\"\$as_me:6261: $ac_compile\"" >&5)
+ (eval echo "\"\$as_me:6284: $ac_compile\"" >&5)
(eval "$ac_compile" 2>conftest.err)
cat conftest.err >&5
- (eval echo "\"\$as_me:6264: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
+ (eval echo "\"\$as_me:6287: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
(eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out)
cat conftest.err >&5
- (eval echo "\"\$as_me:6267: output\"" >&5)
+ (eval echo "\"\$as_me:6290: output\"" >&5)
cat conftest.out >&5
if $GREP 'External.*some_variable' conftest.out > /dev/null; then
lt_cv_nm_interface="MS dumpbin"
@@ -7469,7 +7492,7 @@ ia64-*-hpux*)
;;
*-*-irix6*)
# Find out which ABI we are using.
- echo '#line 7472 "configure"' > conftest.$ac_ext
+ echo '#line 7495 "configure"' > conftest.$ac_ext
if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
(eval $ac_compile) 2>&5
ac_status=$?
@@ -9326,11 +9349,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:9329: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:9352: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:9333: \$? = $ac_status" >&5
+ echo "$as_me:9356: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -9665,11 +9688,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:9668: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:9691: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:9672: \$? = $ac_status" >&5
+ echo "$as_me:9695: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -9770,11 +9793,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:9773: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:9796: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
- echo "$as_me:9777: \$? = $ac_status" >&5
+ echo "$as_me:9800: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
@@ -9825,11 +9848,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:9828: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:9851: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
- echo "$as_me:9832: \$? = $ac_status" >&5
+ echo "$as_me:9855: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
@@ -12628,7 +12651,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 12631 "configure"
+#line 12654 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -12724,7 +12747,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 12727 "configure"
+#line 12750 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -13038,6 +13061,44 @@ fi
#fi
#AC_SUBST(POPT_LIBS)
+
+{ $as_echo "$as_me:$LINENO: checking whether to use MPI_File for input" >&5
+$as_echo_n "checking whether to use MPI_File for input... " >&6; }
+# Check whether --enable-mpi-file-input was given.
+if test "${enable_mpi_file_input+set}" = set; then
+ enableval=$enable_mpi_file_input; answer="$enableval"
+else
+ answer=""
+fi
+
+case x"$answer" in
+ xyes)
+ { $as_echo "$as_me:$LINENO: result: enabled" >&5
+$as_echo "enabled" >&6; }
+
+cat >>confdefs.h <<\_ACEOF
+#define USE_MPI_FILE_INPUT 1
+_ACEOF
+
+ ;;
+
+ xno)
+ { $as_echo "$as_me:$LINENO: result: disabled" >&5
+$as_echo "disabled" >&6; }
+ ;;
+
+ x)
+ { $as_echo "$as_me:$LINENO: result: not specified so enabled by default" >&5
+$as_echo "not specified so enabled by default" >&6; }
+
+cat >>confdefs.h <<\_ACEOF
+#define USE_MPI_FILE_INPUT 1
+_ACEOF
+
+ ;;
+esac
+
+
# In call to ACX_PTHREAD, don't provide ACTION-IF-FOUND; otherwise,
# HAVE_PTHREAD won't be defined
{ $as_echo "$as_me:$LINENO: checking for pthreads feature" >&5
@@ -18747,7 +18808,15 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
-for ac_func in ceil floor index log madvise memcpy memmove memset munmap pow rint stat64 strtoul sysconf sysctl sigaction
+
+
+
+
+
+
+
+for ac_func in ceil floor index log madvise memcpy memmove memset munmap pow rint stat64 strtoul sysconf sysctl sigaction \
+ shmget shmctl shmat shmdt semget semctl semop
do
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
{ $as_echo "$as_me:$LINENO: checking for $ac_func" >&5
@@ -25931,46 +26000,12 @@ esac
-# Goby package
-
-{ $as_echo "$as_me:$LINENO: checking for goby library" >&5
-$as_echo_n "checking for goby library... " >&6; }
-
-# Check whether --with-goby was given.
-if test "${with_goby+set}" = set; then
- withval=$with_goby; answer="$withval"
-else
- answer=""
-fi
-
-if test x"$answer" = x; then
- GOBY_CFLAGS=""
- GOBY_LDFLAGS=""
- GOBY_LIBS=""
- { $as_echo "$as_me:$LINENO: result: disabled" >&5
-$as_echo "disabled" >&6; }
-else
-
-cat >>confdefs.h <<\_ACEOF
-#define HAVE_GOBY 1
-_ACEOF
-
- GOBY_CFLAGS="-I$withval/include"
- GOBY_LDFLAGS="-L$withval/lib"
- GOBY_LIBS="-lgoby"
- { $as_echo "$as_me:$LINENO: result: enabled" >&5
-$as_echo "enabled" >&6; }
-fi
-
-
-
-
-
-
ac_config_files="$ac_config_files Makefile"
ac_config_files="$ac_config_files src/Makefile"
+ac_config_files="$ac_config_files mpi/Makefile"
+
ac_config_files="$ac_config_files util/Makefile"
ac_config_files="$ac_config_files util/gmap_compress.pl"
@@ -26157,6 +26192,13 @@ $as_echo "$as_me: error: conditional \"MAINTAINER\" was never defined.
Usually this means the macro was only invoked conditionally." >&2;}
{ (exit 1); exit 1; }; }
fi
+if test -z "${MPI_FOUND_TRUE}" && test -z "${MPI_FOUND_FALSE}"; then
+ { { $as_echo "$as_me:$LINENO: error: conditional \"MPI_FOUND\" was never defined.
+Usually this means the macro was only invoked conditionally." >&5
+$as_echo "$as_me: error: conditional \"MPI_FOUND\" was never defined.
+Usually this means the macro was only invoked conditionally." >&2;}
+ { (exit 1); exit 1; }; }
+fi
: ${CONFIG_STATUS=./config.status}
@@ -26480,7 +26522,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2014-12-29, which was
+This file was extended by gmap $as_me 2015-06-10, which was
generated by GNU Autoconf 2.63. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -26543,7 +26585,7 @@ Report bugs to <bug-autoconf at gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_version="\\
-gmap config.status 2014-12-29
+gmap config.status 2015-06-10
configured by $0, generated by GNU Autoconf 2.63,
with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
@@ -26926,6 +26968,7 @@ do
"libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;;
"Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
"src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;;
+ "mpi/Makefile") CONFIG_FILES="$CONFIG_FILES mpi/Makefile" ;;
"util/Makefile") CONFIG_FILES="$CONFIG_FILES util/Makefile" ;;
"util/gmap_compress.pl") CONFIG_FILES="$CONFIG_FILES util/gmap_compress.pl" ;;
"util/gmap_uncompress.pl") CONFIG_FILES="$CONFIG_FILES util/gmap_uncompress.pl" ;;
@@ -28550,10 +28593,24 @@ rebuild your compiler and linker.
" >&2;}
fi
-{ $as_echo "$as_me:$LINENO: checking compiler and flags to be used" >&5
-$as_echo_n "checking compiler and flags to be used... " >&6; }
+{ $as_echo "$as_me:$LINENO: checking Standard compiler and flags to be used" >&5
+$as_echo_n "checking Standard compiler and flags to be used... " >&6; }
{ $as_echo "$as_me:$LINENO: result: $CC $CFLAGS" >&5
$as_echo "$CC $CFLAGS" >&6; }
+{ $as_echo "$as_me:$LINENO: checking Standard linker flags to be used" >&5
+$as_echo_n "checking Standard linker flags to be used... " >&6; }
+{ $as_echo "$as_me:$LINENO: result: $LD_FLAGS" >&5
+$as_echo "$LD_FLAGS" >&6; }
+
+{ $as_echo "$as_me:$LINENO: checking MPI compiler and flags to be used" >&5
+$as_echo_n "checking MPI compiler and flags to be used... " >&6; }
+{ $as_echo "$as_me:$LINENO: result: $MPICC $MPI_CFLAGS" >&5
+$as_echo "$MPICC $MPI_CFLAGS" >&6; }
+{ $as_echo "$as_me:$LINENO: checking MPI linker flags to be used" >&5
+$as_echo_n "checking MPI linker flags to be used... " >&6; }
+{ $as_echo "$as_me:$LINENO: result: $MPI_CLDFLAGS" >&5
+$as_echo "$MPI_CLDFLAGS" >&6; }
+
{ $as_echo "$as_me:$LINENO: checking pthread compiler flags to be used" >&5
$as_echo_n "checking pthread compiler flags to be used... " >&6; }
{ $as_echo "$as_me:$LINENO: result: $PTHREAD_CFLAGS" >&5
@@ -28562,6 +28619,7 @@ $as_echo "$PTHREAD_CFLAGS" >&6; }
$as_echo_n "checking popcnt compiler flags to be used... " >&6; }
{ $as_echo "$as_me:$LINENO: result: $POPCNT_CFLAGS" >&5
$as_echo "$POPCNT_CFLAGS" >&6; }
+
{ $as_echo "$as_me:$LINENO: checking SIMD features available on computer" >&5
$as_echo_n "checking SIMD features available on computer... " >&6; }
{ $as_echo "$as_me:$LINENO: result: $ax_cv_cpu_features" >&5
diff --git a/configure.ac b/configure.ac
index 9a5b0ed..44b328d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -73,6 +73,10 @@ else
AC_MSG_RESULT($CFLAGS)
fi
+AC_MSG_CHECKING(MPI_CFLAGS)
+AC_ARG_VAR([MPI_CFLAGS], [Compiler flags (default: -O3)])
+AC_MSG_RESULT($MPI_CFLAGS)
+AC_SUBST(MPI_CFLAGS)
AC_CONFIG_SRCDIR([src/gmap.c])
@@ -87,7 +91,8 @@ AC_SYS_LARGEFILE
AC_ARG_PROGRAM
#AM_INIT_AUTOMAKE([no-dependencies])
-AM_INIT_AUTOMAKE(AC_PACKAGE_NAME, AC_PACKAGE_VERSION)
+#AM_INIT_AUTOMAKE(AC_PACKAGE_NAME, AC_PACKAGE_VERSION)
+AM_INIT_AUTOMAKE
AM_CONDITIONAL(FULLDIST,test "x$enable_fulldist" = xyes)
AC_ARG_ENABLE([fulldist],
@@ -140,7 +145,11 @@ ACX_PATH_PERL
AC_PROG_CC
AM_PROG_CC_C_O
+
AX_MPI # Sets MPICC to use for isolated source files that need it
+# AC_PROG_CC_MPI # This sets CC to mpicc
+
+AM_CONDITIONAL(MPI_FOUND,test "x$MPILIBS" != x)
AC_PROG_LIBTOOL
@@ -156,6 +165,30 @@ AC_CHECK_LIB(m, rint)
#fi
#AC_SUBST(POPT_LIBS)
+
+AC_MSG_CHECKING(whether to use MPI_File for input)
+AC_ARG_ENABLE([mpi-file-input],
+ AC_HELP_STRING([--enable-mpi-file-input],
+ [Enable MPI_File for input for MPI versions (default=yes). Note: Use only if you have a parallel I/O file system, like Lustre]),
+ [answer="$enableval"],
+ [answer=""])
+case x"$answer" in
+ xyes)
+ AC_MSG_RESULT(enabled)
+ AC_DEFINE(USE_MPI_FILE_INPUT,1,[Define to 1 if you want to use MPI_File for input.])
+ ;;
+
+ xno)
+ AC_MSG_RESULT(disabled)
+ ;;
+
+ x)
+ AC_MSG_RESULT([not specified so enabled by default])
+ AC_DEFINE(USE_MPI_FILE_INPUT,1,[Define to 1 if you want to use MPI_File for input.])
+ ;;
+esac
+
+
# In call to ACX_PTHREAD, don't provide ACTION-IF-FOUND; otherwise,
# HAVE_PTHREAD won't be defined
AC_MSG_CHECKING(for pthreads feature)
@@ -266,7 +299,8 @@ fi
ACX_MMAP_FLAGS
ACX_MADVISE_FLAGS
-AC_CHECK_FUNCS([ceil floor index log madvise memcpy memmove memset munmap pow rint stat64 strtoul sysconf sysctl sigaction])
+AC_CHECK_FUNCS([ceil floor index log madvise memcpy memmove memset munmap pow rint stat64 strtoul sysconf sysctl sigaction \
+ shmget shmctl shmat shmdt semget semctl semop])
ACX_STRUCT_STAT64
ACX_PAGESIZE
@@ -616,34 +650,9 @@ esac
AC_SUBST(BZLIB_LIBS)
-# Goby package
-
-AC_MSG_CHECKING(for goby library)
-AC_ARG_WITH([goby],
- AC_HELP_STRING([--with-goby=DIR],
- [Location of Goby header files (in DIR/include) and library files (in DIR/lib) (optional)]),
- [answer="$withval"],
- [answer=""])
-if test x"$answer" = x; then
- GOBY_CFLAGS=""
- GOBY_LDFLAGS=""
- GOBY_LIBS=""
- AC_MSG_RESULT(disabled)
-else
- AC_DEFINE(HAVE_GOBY,1,[Define to 1 if you have a working Goby library.])
- GOBY_CFLAGS="-I$withval/include"
- GOBY_LDFLAGS="-L$withval/lib"
- GOBY_LIBS="-lgoby"
- AC_MSG_RESULT(enabled)
-fi
-AC_SUBST(GOBY_CFLAGS)
-AC_SUBST(GOBY_LDFLAGS)
-AC_SUBST(GOBY_LIBS)
-
-
-
AC_CONFIG_FILES([Makefile])
AC_CONFIG_FILES([src/Makefile])
+AC_CONFIG_FILES([mpi/Makefile])
AC_CONFIG_FILES([util/Makefile])
AC_CONFIG_FILES([util/gmap_compress.pl])
AC_CONFIG_FILES([util/gmap_uncompress.pl])
@@ -747,12 +756,21 @@ rebuild your compiler and linker.
])
fi
-AC_MSG_CHECKING(compiler and flags to be used)
+AC_MSG_CHECKING(Standard compiler and flags to be used)
AC_MSG_RESULT($CC $CFLAGS)
+AC_MSG_CHECKING(Standard linker flags to be used)
+AC_MSG_RESULT($LD_FLAGS)
+
+AC_MSG_CHECKING(MPI compiler and flags to be used)
+AC_MSG_RESULT($MPICC $MPI_CFLAGS)
+AC_MSG_CHECKING(MPI linker flags to be used)
+AC_MSG_RESULT($MPI_CLDFLAGS)
+
AC_MSG_CHECKING(pthread compiler flags to be used)
AC_MSG_RESULT($PTHREAD_CFLAGS)
AC_MSG_CHECKING(popcnt compiler flags to be used)
AC_MSG_RESULT($POPCNT_CFLAGS)
+
AC_MSG_CHECKING(SIMD features available on computer)
AC_MSG_RESULT($ax_cv_cpu_features)
AC_MSG_CHECKING(SIMD compiler flags to be used)
diff --git a/mpi/Makefile.am b/mpi/Makefile.am
new file mode 100644
index 0000000..533cb61
--- /dev/null
+++ b/mpi/Makefile.am
@@ -0,0 +1,111 @@
+
+
+CC = $(MPICC)
+srcdir = $(top_srcdir)/src
+
+# This is a configure-time conditional
+if MPI_FOUND
+bin_PROGRAMS = mpi_gsnap
+endif
+
+
+MPI_GSNAP_FILES = $(srcdir)/mpidebug.c $(srcdir)/mpidebug.h \
+ $(srcdir)/fopen.h $(srcdir)/bool.h $(srcdir)/types.h $(srcdir)/separator.h $(srcdir)/comp.h \
+ $(srcdir)/except.c $(srcdir)/except.h $(srcdir)/assert.c $(srcdir)/assert.h $(srcdir)/mem.c $(srcdir)/mem.h \
+ $(srcdir)/intlistdef.h $(srcdir)/intlist.c $(srcdir)/intlist.h $(srcdir)/listdef.h $(srcdir)/list.c $(srcdir)/list.h \
+ $(srcdir)/littleendian.c $(srcdir)/littleendian.h $(srcdir)/bigendian.c $(srcdir)/bigendian.h \
+ $(srcdir)/univinterval.c $(srcdir)/univinterval.h $(srcdir)/interval.c $(srcdir)/interval.h \
+ $(srcdir)/uintlist.c $(srcdir)/uintlist.h \
+ $(srcdir)/stopwatch.c $(srcdir)/stopwatch.h $(srcdir)/access.c $(srcdir)/access.h \
+ $(srcdir)/filestring.c $(srcdir)/filestring.h \
+ $(srcdir)/iit-read-univ.c $(srcdir)/iit-read-univ.h $(srcdir)/iitdef.h $(srcdir)/iit-read.c $(srcdir)/iit-read.h \
+ $(srcdir)/md5.c $(srcdir)/md5.h $(srcdir)/complement.h $(srcdir)/bzip2.c $(srcdir)/bzip2.h $(srcdir)/sequence.c $(srcdir)/sequence.h $(srcdir)/reader.c $(srcdir)/reader.h \
+ $(srcdir)/genomicpos.c $(srcdir)/genomicpos.h $(srcdir)/compress.c $(srcdir)/compress.h \
+ $(srcdir)/genome.c $(srcdir)/genome.h \
+ $(srcdir)/popcount.c $(srcdir)/popcount.h $(srcdir)/genome128_hr.c $(srcdir)/genome128_hr.h $(srcdir)/genome_sites.c $(srcdir)/genome_sites.h \
+ $(srcdir)/bitpack64-read.c $(srcdir)/bitpack64-read.h $(srcdir)/bitpack64-readtwo.c $(srcdir)/bitpack64-readtwo.h \
+ $(srcdir)/indexdbdef.h $(srcdir)/indexdb.c $(srcdir)/indexdb.h $(srcdir)/indexdb_hr.c $(srcdir)/indexdb_hr.h \
+ $(srcdir)/oligo.c $(srcdir)/oligo.h \
+ $(srcdir)/chrom.c $(srcdir)/chrom.h $(srcdir)/segmentpos.c $(srcdir)/segmentpos.h \
+ $(srcdir)/chrnum.c $(srcdir)/chrnum.h \
+ $(srcdir)/maxent_hr.c $(srcdir)/maxent_hr.h $(srcdir)/samflags.h $(srcdir)/samprint.c $(srcdir)/samprint.h \
+ $(srcdir)/mapq.c $(srcdir)/mapq.h $(srcdir)/shortread.c $(srcdir)/shortread.h $(srcdir)/substring.c $(srcdir)/substring.h $(srcdir)/junction.c $(srcdir)/junction.h $(srcdir)/stage3hr.c $(srcdir)/stage3hr.h \
+ $(srcdir)/spanningelt.c $(srcdir)/spanningelt.h $(srcdir)/cmet.c $(srcdir)/cmet.h $(srcdir)/atoi.c $(srcdir)/atoi.h \
+ $(srcdir)/comp.h $(srcdir)/maxent.c $(srcdir)/maxent.h $(srcdir)/pairdef.h $(srcdir)/pair.c $(srcdir)/pair.h $(srcdir)/pairpool.c $(srcdir)/pairpool.h $(srcdir)/diag.c $(srcdir)/diag.h $(srcdir)/diagpool.c $(srcdir)/diagpool.h \
+ $(srcdir)/orderstat.c $(srcdir)/orderstat.h $(srcdir)/oligoindex_hr.c $(srcdir)/oligoindex_hr.h $(srcdir)/cellpool.c $(srcdir)/cellpool.h $(srcdir)/stage2.c $(srcdir)/stage2.h \
+ $(srcdir)/intron.c $(srcdir)/intron.h $(srcdir)/boyer-moore.c $(srcdir)/boyer-moore.h $(srcdir)/changepoint.c $(srcdir)/changepoint.h $(srcdir)/pbinom.c $(srcdir)/pbinom.h \
+ $(srcdir)/dynprog.c $(srcdir)/dynprog.h $(srcdir)/dynprog_simd.c $(srcdir)/dynprog_simd.h \
+ $(srcdir)/dynprog_single.c $(srcdir)/dynprog_single.h $(srcdir)/dynprog_genome.c $(srcdir)/dynprog_genome.h $(srcdir)/dynprog_cdna.c $(srcdir)/dynprog_cdna.h $(srcdir)/dynprog_end.c $(srcdir)/dynprog_end.h \
+ $(srcdir)/gbuffer.c $(srcdir)/gbuffer.h $(srcdir)/translation.c $(srcdir)/translation.h \
+ $(srcdir)/doublelist.c $(srcdir)/doublelist.h $(srcdir)/smooth.c $(srcdir)/smooth.h \
+ $(srcdir)/chimera.c $(srcdir)/chimera.h $(srcdir)/sense.h $(srcdir)/fastlog.h $(srcdir)/stage3.c $(srcdir)/stage3.h \
+ $(srcdir)/splicestringpool.c $(srcdir)/splicestringpool.h $(srcdir)/splicetrie_build.c $(srcdir)/splicetrie_build.h $(srcdir)/splicetrie.c $(srcdir)/splicetrie.h \
+ $(srcdir)/splice.c $(srcdir)/splice.h $(srcdir)/indel.c $(srcdir)/indel.h $(srcdir)/bitpack64-access.c $(srcdir)/bitpack64-access.h \
+ $(srcdir)/bytecoding.c $(srcdir)/bytecoding.h $(srcdir)/univdiagdef.h $(srcdir)/univdiag.c $(srcdir)/univdiag.h $(srcdir)/sarray-read.c $(srcdir)/sarray-read.h \
+ $(srcdir)/stage1hr.c $(srcdir)/stage1hr.h \
+ $(srcdir)/request.c $(srcdir)/request.h $(srcdir)/resulthr.c $(srcdir)/resulthr.h $(srcdir)/output.c $(srcdir)/output.h \
+ $(srcdir)/master.c $(srcdir)/master.h \
+ $(srcdir)/inbuffer.c $(srcdir)/inbuffer.h $(srcdir)/samheader.c $(srcdir)/samheader.h $(srcdir)/outbuffer.c $(srcdir)/outbuffer.h \
+ $(srcdir)/datadir.c $(srcdir)/datadir.h $(srcdir)/mode.h \
+ $(srcdir)/getopt.c $(srcdir)/getopt1.c $(srcdir)/getopt.h $(srcdir)/gsnap.c
+
+
+# Note: dist_ commands get read by bootstrap, and don't follow the flags
+
+mpi_gsnap_CC = $(MPICC)
+mpi_gsnap_CFLAGS = $(MPI_CFLAGS) $(AM_CFLAGS) $(PTHREAD_CFLAGS) $(POPCNT_CFLAGS) $(SIMD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DUSE_MPI=1
+mpi_gsnap_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
+mpi_gsnap_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
+
+# Need nodist_ to prevent sources from being copied from src/ subdirectory
+nodist_mpi_gsnap_SOURCES = $(MPI_GSNAP_FILES)
+
+
+MPI_GMAP_FILES = $(srcdir)/mpidebug.c $(srcdir)/mpidebug.h \
+ $(srcdir)/= $(srcdir)/fopen.h $(srcdir)/bool.h $(srcdir)/types.h $(srcdir)/separator.h $(srcdir)/comp.h \
+ $(srcdir)/except.c $(srcdir)/except.h $(srcdir)/assert.c $(srcdir)/assert.h $(srcdir)/mem.c $(srcdir)/mem.h \
+ $(srcdir)/intlistdef.h $(srcdir)/intlist.c $(srcdir)/intlist.h $(srcdir)/listdef.h $(srcdir)/list.c $(srcdir)/list.h \
+ $(srcdir)/littleendian.c $(srcdir)/littleendian.h $(srcdir)/bigendian.c $(srcdir)/bigendian.h \
+ $(srcdir)/univinterval.c $(srcdir)/univinterval.h $(srcdir)/interval.c $(srcdir)/interval.h $(srcdir)/uintlist.c $(srcdir)/uintlist.h \
+ $(srcdir)/stopwatch.c $(srcdir)/stopwatch.h $(srcdir)/access.c $(srcdir)/access.h \
+ $(srcdir)/filestring.c $(srcdir)/filestring.h \
+ $(srcdir)/iit-read-univ.c $(srcdir)/iit-read-univ.h $(srcdir)/iitdef.h $(srcdir)/iit-read.c $(srcdir)/iit-read.h \
+ $(srcdir)/md5.c $(srcdir)/md5.h $(srcdir)/complement.h $(srcdir)/bzip2.c $(srcdir)/bzip2.h $(srcdir)/sequence.c $(srcdir)/sequence.h $(srcdir)/reader.c $(srcdir)/reader.h \
+ $(srcdir)/genomicpos.c $(srcdir)/genomicpos.h $(srcdir)/compress.c $(srcdir)/compress.h $(srcdir)/compress-write.c $(srcdir)/compress-write.h \
+ $(srcdir)/gbuffer.c $(srcdir)/gbuffer.h $(srcdir)/genome.c $(srcdir)/genome.h \
+ $(srcdir)/popcount.c $(srcdir)/popcount.h $(srcdir)/genome128_hr.c $(srcdir)/genome128_hr.h $(srcdir)/genome_sites.c $(srcdir)/genome_sites.h \
+ $(srcdir)/genome-write.c $(srcdir)/genome-write.h \
+ $(srcdir)/bitpack64-read.c $(srcdir)/bitpack64-read.h $(srcdir)/bitpack64-readtwo.c $(srcdir)/bitpack64-readtwo.h \
+ $(srcdir)/indexdbdef.h $(srcdir)/indexdb.c $(srcdir)/indexdb.h $(srcdir)/indexdb_hr.c $(srcdir)/indexdb_hr.h \
+ $(srcdir)/oligo.c $(srcdir)/oligo.h $(srcdir)/block.c $(srcdir)/block.h \
+ $(srcdir)/chrom.c $(srcdir)/chrom.h $(srcdir)/segmentpos.c $(srcdir)/segmentpos.h \
+ $(srcdir)/chrnum.c $(srcdir)/chrnum.h $(srcdir)/uinttable.c $(srcdir)/uinttable.h $(srcdir)/gregion.c $(srcdir)/gregion.h \
+ $(srcdir)/matchdef.h $(srcdir)/match.c $(srcdir)/match.h $(srcdir)/matchpool.c $(srcdir)/matchpool.h \
+ $(srcdir)/diagnostic.c $(srcdir)/diagnostic.h $(srcdir)/stage1.c $(srcdir)/stage1.h \
+ $(srcdir)/diagdef.h $(srcdir)/diag.c $(srcdir)/diag.h $(srcdir)/diagpool.c $(srcdir)/diagpool.h \
+ $(srcdir)/cmet.c $(srcdir)/cmet.h $(srcdir)/atoi.c $(srcdir)/atoi.h \
+ $(srcdir)/orderstat.c $(srcdir)/orderstat.h $(srcdir)/oligoindex_hr.c $(srcdir)/oligoindex_hr.h \
+ $(srcdir)/scores.h $(srcdir)/intron.c $(srcdir)/intron.h $(srcdir)/maxent.c $(srcdir)/maxent.h $(srcdir)/maxent_hr.c $(srcdir)/maxent_hr.h $(srcdir)/samflags.h $(srcdir)/pairdef.h $(srcdir)/pair.c $(srcdir)/pair.h \
+ $(srcdir)/pairpool.c $(srcdir)/pairpool.h $(srcdir)/cellpool.c $(srcdir)/cellpool.h $(srcdir)/stage2.c $(srcdir)/stage2.h \
+ $(srcdir)/doublelist.c $(srcdir)/doublelist.h $(srcdir)/smooth.c $(srcdir)/smooth.h \
+ $(srcdir)/splicestringpool.c $(srcdir)/splicestringpool.h $(srcdir)/splicetrie_build.c $(srcdir)/splicetrie_build.h $(srcdir)/splicetrie.c $(srcdir)/splicetrie.h \
+ $(srcdir)/boyer-moore.c $(srcdir)/boyer-moore.h \
+ $(srcdir)/dynprog.c $(srcdir)/dynprog.h $(srcdir)/dynprog_simd.c $(srcdir)/dynprog_simd.h \
+ $(srcdir)/dynprog_single.c $(srcdir)/dynprog_single.h $(srcdir)/dynprog_genome.c $(srcdir)/dynprog_genome.h $(srcdir)/dynprog_cdna.c $(srcdir)/dynprog_cdna.h $(srcdir)/dynprog_end.c $(srcdir)/dynprog_end.h \
+ $(srcdir)/translation.c $(srcdir)/translation.h \
+ $(srcdir)/pbinom.c $(srcdir)/pbinom.h $(srcdir)/changepoint.c $(srcdir)/changepoint.h $(srcdir)/sense.h $(srcdir)/fastlog.h $(srcdir)/stage3.c $(srcdir)/stage3.h \
+ $(srcdir)/request.c $(srcdir)/request.h $(srcdir)/result.c $(srcdir)/result.h $(srcdir)/output.c $(srcdir)/output.h \
+ $(srcdir)/inbuffer.c $(srcdir)/inbuffer.h $(srcdir)/samheader.c $(srcdir)/samheader.h $(srcdir)/outbuffer.c $(srcdir)/outbuffer.h \
+ $(srcdir)/chimera.c $(srcdir)/chimera.h $(srcdir)/datadir.c $(srcdir)/datadir.h \
+ $(srcdir)/getopt.c $(srcdir)/getopt1.c $(srcdir)/getopt.h $(srcdir)/gmap.c
+
+# Note: dist_ commands get read by bootstrap, and don't follow the flags
+
+mpi_gmap_CC = $(MPICC)
+mpi_gmap_CFLAGS = $(MPI_CFLAGS) $(AM_CFLAGS) $(PTHREAD_CFLAGS) $(POPCNT_CFLAGS) $(SIMD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DUSE_MPI=1
+mpi_gmap_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
+mpi_gmap_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
+
+nodist_mpi_gmap_SOURCES = $(MPI_GMAP_FILES)
+
+
diff --git a/mpi/Makefile.in b/mpi/Makefile.in
new file mode 100644
index 0000000..941269f
--- /dev/null
+++ b/mpi/Makefile.in
@@ -0,0 +1,2041 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+ at SET_MAKE@
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+ at MPI_FOUND_TRUE@bin_PROGRAMS = mpi_gsnap$(EXEEXT)
+subdir = mpi
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \
+ $(top_srcdir)/config/ltoptions.m4 \
+ $(top_srcdir)/config/ltsugar.m4 \
+ $(top_srcdir)/config/ltversion.m4 \
+ $(top_srcdir)/config/lt~obsolete.m4 $(top_srcdir)/acinclude.m4 \
+ $(top_srcdir)/config/pagesize.m4 \
+ $(top_srcdir)/config/madvise-flags.m4 \
+ $(top_srcdir)/config/mmap-flags.m4 \
+ $(top_srcdir)/config/acx_mmap_fixed.m4 \
+ $(top_srcdir)/config/acx_mmap_variable.m4 \
+ $(top_srcdir)/config/ax_mpi.m4 \
+ $(top_srcdir)/config/acx_pthread.m4 \
+ $(top_srcdir)/config/builtin-popcount.m4 \
+ $(top_srcdir)/config/struct-stat64.m4 \
+ $(top_srcdir)/config/expand.m4 $(top_srcdir)/config/perl.m4 \
+ $(top_srcdir)/config/fopen.m4 $(top_srcdir)/config/asm-bsr.m4 \
+ $(top_srcdir)/config/sse2_shift_defect.m4 \
+ $(top_srcdir)/config/ax_gcc_x86_cpuid.m4 \
+ $(top_srcdir)/config/ax_gcc_x86_avx_xgetbv.m4 \
+ $(top_srcdir)/config/ax_check_compile_flag.m4 \
+ $(top_srcdir)/config/ax_ext.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/src/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)"
+PROGRAMS = $(bin_PROGRAMS)
+am__objects_1 = mpi_gsnap-mpidebug.$(OBJEXT) \
+ mpi_gsnap-except.$(OBJEXT) mpi_gsnap-assert.$(OBJEXT) \
+ mpi_gsnap-mem.$(OBJEXT) mpi_gsnap-intlist.$(OBJEXT) \
+ mpi_gsnap-list.$(OBJEXT) mpi_gsnap-littleendian.$(OBJEXT) \
+ mpi_gsnap-bigendian.$(OBJEXT) mpi_gsnap-univinterval.$(OBJEXT) \
+ mpi_gsnap-interval.$(OBJEXT) mpi_gsnap-uintlist.$(OBJEXT) \
+ mpi_gsnap-stopwatch.$(OBJEXT) mpi_gsnap-access.$(OBJEXT) \
+ mpi_gsnap-filestring.$(OBJEXT) \
+ mpi_gsnap-iit-read-univ.$(OBJEXT) mpi_gsnap-iit-read.$(OBJEXT) \
+ mpi_gsnap-md5.$(OBJEXT) mpi_gsnap-bzip2.$(OBJEXT) \
+ mpi_gsnap-sequence.$(OBJEXT) mpi_gsnap-reader.$(OBJEXT) \
+ mpi_gsnap-genomicpos.$(OBJEXT) mpi_gsnap-compress.$(OBJEXT) \
+ mpi_gsnap-genome.$(OBJEXT) mpi_gsnap-popcount.$(OBJEXT) \
+ mpi_gsnap-genome128_hr.$(OBJEXT) \
+ mpi_gsnap-genome_sites.$(OBJEXT) \
+ mpi_gsnap-bitpack64-read.$(OBJEXT) \
+ mpi_gsnap-bitpack64-readtwo.$(OBJEXT) \
+ mpi_gsnap-indexdb.$(OBJEXT) mpi_gsnap-indexdb_hr.$(OBJEXT) \
+ mpi_gsnap-oligo.$(OBJEXT) mpi_gsnap-chrom.$(OBJEXT) \
+ mpi_gsnap-segmentpos.$(OBJEXT) mpi_gsnap-chrnum.$(OBJEXT) \
+ mpi_gsnap-maxent_hr.$(OBJEXT) mpi_gsnap-samprint.$(OBJEXT) \
+ mpi_gsnap-mapq.$(OBJEXT) mpi_gsnap-shortread.$(OBJEXT) \
+ mpi_gsnap-substring.$(OBJEXT) mpi_gsnap-junction.$(OBJEXT) \
+ mpi_gsnap-stage3hr.$(OBJEXT) mpi_gsnap-spanningelt.$(OBJEXT) \
+ mpi_gsnap-cmet.$(OBJEXT) mpi_gsnap-atoi.$(OBJEXT) \
+ mpi_gsnap-maxent.$(OBJEXT) mpi_gsnap-pair.$(OBJEXT) \
+ mpi_gsnap-pairpool.$(OBJEXT) mpi_gsnap-diag.$(OBJEXT) \
+ mpi_gsnap-diagpool.$(OBJEXT) mpi_gsnap-orderstat.$(OBJEXT) \
+ mpi_gsnap-oligoindex_hr.$(OBJEXT) mpi_gsnap-cellpool.$(OBJEXT) \
+ mpi_gsnap-stage2.$(OBJEXT) mpi_gsnap-intron.$(OBJEXT) \
+ mpi_gsnap-boyer-moore.$(OBJEXT) \
+ mpi_gsnap-changepoint.$(OBJEXT) mpi_gsnap-pbinom.$(OBJEXT) \
+ mpi_gsnap-dynprog.$(OBJEXT) mpi_gsnap-dynprog_simd.$(OBJEXT) \
+ mpi_gsnap-dynprog_single.$(OBJEXT) \
+ mpi_gsnap-dynprog_genome.$(OBJEXT) \
+ mpi_gsnap-dynprog_cdna.$(OBJEXT) \
+ mpi_gsnap-dynprog_end.$(OBJEXT) mpi_gsnap-gbuffer.$(OBJEXT) \
+ mpi_gsnap-translation.$(OBJEXT) mpi_gsnap-doublelist.$(OBJEXT) \
+ mpi_gsnap-smooth.$(OBJEXT) mpi_gsnap-chimera.$(OBJEXT) \
+ mpi_gsnap-stage3.$(OBJEXT) \
+ mpi_gsnap-splicestringpool.$(OBJEXT) \
+ mpi_gsnap-splicetrie_build.$(OBJEXT) \
+ mpi_gsnap-splicetrie.$(OBJEXT) mpi_gsnap-splice.$(OBJEXT) \
+ mpi_gsnap-indel.$(OBJEXT) mpi_gsnap-bitpack64-access.$(OBJEXT) \
+ mpi_gsnap-bytecoding.$(OBJEXT) mpi_gsnap-univdiag.$(OBJEXT) \
+ mpi_gsnap-sarray-read.$(OBJEXT) mpi_gsnap-stage1hr.$(OBJEXT) \
+ mpi_gsnap-request.$(OBJEXT) mpi_gsnap-resulthr.$(OBJEXT) \
+ mpi_gsnap-output.$(OBJEXT) mpi_gsnap-master.$(OBJEXT) \
+ mpi_gsnap-inbuffer.$(OBJEXT) mpi_gsnap-samheader.$(OBJEXT) \
+ mpi_gsnap-outbuffer.$(OBJEXT) mpi_gsnap-datadir.$(OBJEXT) \
+ mpi_gsnap-getopt.$(OBJEXT) mpi_gsnap-getopt1.$(OBJEXT) \
+ mpi_gsnap-gsnap.$(OBJEXT)
+nodist_mpi_gsnap_OBJECTS = $(am__objects_1)
+mpi_gsnap_OBJECTS = $(nodist_mpi_gsnap_OBJECTS)
+am__DEPENDENCIES_1 =
+mpi_gsnap_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
+ $(am__DEPENDENCIES_1)
+mpi_gsnap_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(mpi_gsnap_CFLAGS) \
+ $(CFLAGS) $(mpi_gsnap_LDFLAGS) $(LDFLAGS) -o $@
+DEFAULT_INCLUDES = -I. at am__isrc@ -I$(top_builddir)/src
+depcomp = $(SHELL) $(top_srcdir)/config/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(nodist_mpi_gsnap_SOURCES)
+DIST_SOURCES =
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+ALLOCA = @ALLOCA@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BINDIR = @BINDIR@
+BZLIB_LIBS = @BZLIB_LIBS@
+CC = $(MPICC)
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GMAPDB = @GMAPDB@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MAX_READLENGTH = @MAX_READLENGTH@
+MKDIR_P = @MKDIR_P@
+MPICC = @MPICC@
+MPILIBS = @MPILIBS@
+MPI_CFLAGS = @MPI_CFLAGS@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+POPCNT_CFLAGS = @POPCNT_CFLAGS@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SIMD_CFLAGS = @SIMD_CFLAGS@
+STRIP = @STRIP@
+VERSION = @VERSION@
+ZLIB_LIBS = @ZLIB_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = $(top_srcdir)/src
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+MPI_GSNAP_FILES = $(srcdir)/mpidebug.c $(srcdir)/mpidebug.h \
+ $(srcdir)/fopen.h $(srcdir)/bool.h $(srcdir)/types.h $(srcdir)/separator.h $(srcdir)/comp.h \
+ $(srcdir)/except.c $(srcdir)/except.h $(srcdir)/assert.c $(srcdir)/assert.h $(srcdir)/mem.c $(srcdir)/mem.h \
+ $(srcdir)/intlistdef.h $(srcdir)/intlist.c $(srcdir)/intlist.h $(srcdir)/listdef.h $(srcdir)/list.c $(srcdir)/list.h \
+ $(srcdir)/littleendian.c $(srcdir)/littleendian.h $(srcdir)/bigendian.c $(srcdir)/bigendian.h \
+ $(srcdir)/univinterval.c $(srcdir)/univinterval.h $(srcdir)/interval.c $(srcdir)/interval.h \
+ $(srcdir)/uintlist.c $(srcdir)/uintlist.h \
+ $(srcdir)/stopwatch.c $(srcdir)/stopwatch.h $(srcdir)/access.c $(srcdir)/access.h \
+ $(srcdir)/filestring.c $(srcdir)/filestring.h \
+ $(srcdir)/iit-read-univ.c $(srcdir)/iit-read-univ.h $(srcdir)/iitdef.h $(srcdir)/iit-read.c $(srcdir)/iit-read.h \
+ $(srcdir)/md5.c $(srcdir)/md5.h $(srcdir)/complement.h $(srcdir)/bzip2.c $(srcdir)/bzip2.h $(srcdir)/sequence.c $(srcdir)/sequence.h $(srcdir)/reader.c $(srcdir)/reader.h \
+ $(srcdir)/genomicpos.c $(srcdir)/genomicpos.h $(srcdir)/compress.c $(srcdir)/compress.h \
+ $(srcdir)/genome.c $(srcdir)/genome.h \
+ $(srcdir)/popcount.c $(srcdir)/popcount.h $(srcdir)/genome128_hr.c $(srcdir)/genome128_hr.h $(srcdir)/genome_sites.c $(srcdir)/genome_sites.h \
+ $(srcdir)/bitpack64-read.c $(srcdir)/bitpack64-read.h $(srcdir)/bitpack64-readtwo.c $(srcdir)/bitpack64-readtwo.h \
+ $(srcdir)/indexdbdef.h $(srcdir)/indexdb.c $(srcdir)/indexdb.h $(srcdir)/indexdb_hr.c $(srcdir)/indexdb_hr.h \
+ $(srcdir)/oligo.c $(srcdir)/oligo.h \
+ $(srcdir)/chrom.c $(srcdir)/chrom.h $(srcdir)/segmentpos.c $(srcdir)/segmentpos.h \
+ $(srcdir)/chrnum.c $(srcdir)/chrnum.h \
+ $(srcdir)/maxent_hr.c $(srcdir)/maxent_hr.h $(srcdir)/samflags.h $(srcdir)/samprint.c $(srcdir)/samprint.h \
+ $(srcdir)/mapq.c $(srcdir)/mapq.h $(srcdir)/shortread.c $(srcdir)/shortread.h $(srcdir)/substring.c $(srcdir)/substring.h $(srcdir)/junction.c $(srcdir)/junction.h $(srcdir)/stage3hr.c $(srcdir)/stage3hr.h \
+ $(srcdir)/spanningelt.c $(srcdir)/spanningelt.h $(srcdir)/cmet.c $(srcdir)/cmet.h $(srcdir)/atoi.c $(srcdir)/atoi.h \
+ $(srcdir)/comp.h $(srcdir)/maxent.c $(srcdir)/maxent.h $(srcdir)/pairdef.h $(srcdir)/pair.c $(srcdir)/pair.h $(srcdir)/pairpool.c $(srcdir)/pairpool.h $(srcdir)/diag.c $(srcdir)/diag.h $(srcdir)/diagpool.c $(srcdir)/diagpool.h \
+ $(srcdir)/orderstat.c $(srcdir)/orderstat.h $(srcdir)/oligoindex_hr.c $(srcdir)/oligoindex_hr.h $(srcdir)/cellpool.c $(srcdir)/cellpool.h $(srcdir)/stage2.c $(srcdir)/stage2.h \
+ $(srcdir)/intron.c $(srcdir)/intron.h $(srcdir)/boyer-moore.c $(srcdir)/boyer-moore.h $(srcdir)/changepoint.c $(srcdir)/changepoint.h $(srcdir)/pbinom.c $(srcdir)/pbinom.h \
+ $(srcdir)/dynprog.c $(srcdir)/dynprog.h $(srcdir)/dynprog_simd.c $(srcdir)/dynprog_simd.h \
+ $(srcdir)/dynprog_single.c $(srcdir)/dynprog_single.h $(srcdir)/dynprog_genome.c $(srcdir)/dynprog_genome.h $(srcdir)/dynprog_cdna.c $(srcdir)/dynprog_cdna.h $(srcdir)/dynprog_end.c $(srcdir)/dynprog_end.h \
+ $(srcdir)/gbuffer.c $(srcdir)/gbuffer.h $(srcdir)/translation.c $(srcdir)/translation.h \
+ $(srcdir)/doublelist.c $(srcdir)/doublelist.h $(srcdir)/smooth.c $(srcdir)/smooth.h \
+ $(srcdir)/chimera.c $(srcdir)/chimera.h $(srcdir)/sense.h $(srcdir)/fastlog.h $(srcdir)/stage3.c $(srcdir)/stage3.h \
+ $(srcdir)/splicestringpool.c $(srcdir)/splicestringpool.h $(srcdir)/splicetrie_build.c $(srcdir)/splicetrie_build.h $(srcdir)/splicetrie.c $(srcdir)/splicetrie.h \
+ $(srcdir)/splice.c $(srcdir)/splice.h $(srcdir)/indel.c $(srcdir)/indel.h $(srcdir)/bitpack64-access.c $(srcdir)/bitpack64-access.h \
+ $(srcdir)/bytecoding.c $(srcdir)/bytecoding.h $(srcdir)/univdiagdef.h $(srcdir)/univdiag.c $(srcdir)/univdiag.h $(srcdir)/sarray-read.c $(srcdir)/sarray-read.h \
+ $(srcdir)/stage1hr.c $(srcdir)/stage1hr.h \
+ $(srcdir)/request.c $(srcdir)/request.h $(srcdir)/resulthr.c $(srcdir)/resulthr.h $(srcdir)/output.c $(srcdir)/output.h \
+ $(srcdir)/master.c $(srcdir)/master.h \
+ $(srcdir)/inbuffer.c $(srcdir)/inbuffer.h $(srcdir)/samheader.c $(srcdir)/samheader.h $(srcdir)/outbuffer.c $(srcdir)/outbuffer.h \
+ $(srcdir)/datadir.c $(srcdir)/datadir.h $(srcdir)/mode.h \
+ $(srcdir)/getopt.c $(srcdir)/getopt1.c $(srcdir)/getopt.h $(srcdir)/gsnap.c
+
+
+# Note: dist_ commands get read by bootstrap, and don't follow the flags
+mpi_gsnap_CC = $(MPICC)
+mpi_gsnap_CFLAGS = $(MPI_CFLAGS) $(AM_CFLAGS) $(PTHREAD_CFLAGS) $(POPCNT_CFLAGS) $(SIMD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DUSE_MPI=1
+mpi_gsnap_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
+mpi_gsnap_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
+
+# Need nodist_ to prevent sources from being copied from src/ subdirectory
+nodist_mpi_gsnap_SOURCES = $(MPI_GSNAP_FILES)
+MPI_GMAP_FILES = $(srcdir)/mpidebug.c $(srcdir)/mpidebug.h \
+ $(srcdir)/= $(srcdir)/fopen.h $(srcdir)/bool.h $(srcdir)/types.h $(srcdir)/separator.h $(srcdir)/comp.h \
+ $(srcdir)/except.c $(srcdir)/except.h $(srcdir)/assert.c $(srcdir)/assert.h $(srcdir)/mem.c $(srcdir)/mem.h \
+ $(srcdir)/intlistdef.h $(srcdir)/intlist.c $(srcdir)/intlist.h $(srcdir)/listdef.h $(srcdir)/list.c $(srcdir)/list.h \
+ $(srcdir)/littleendian.c $(srcdir)/littleendian.h $(srcdir)/bigendian.c $(srcdir)/bigendian.h \
+ $(srcdir)/univinterval.c $(srcdir)/univinterval.h $(srcdir)/interval.c $(srcdir)/interval.h $(srcdir)/uintlist.c $(srcdir)/uintlist.h \
+ $(srcdir)/stopwatch.c $(srcdir)/stopwatch.h $(srcdir)/access.c $(srcdir)/access.h \
+ $(srcdir)/filestring.c $(srcdir)/filestring.h \
+ $(srcdir)/iit-read-univ.c $(srcdir)/iit-read-univ.h $(srcdir)/iitdef.h $(srcdir)/iit-read.c $(srcdir)/iit-read.h \
+ $(srcdir)/md5.c $(srcdir)/md5.h $(srcdir)/complement.h $(srcdir)/bzip2.c $(srcdir)/bzip2.h $(srcdir)/sequence.c $(srcdir)/sequence.h $(srcdir)/reader.c $(srcdir)/reader.h \
+ $(srcdir)/genomicpos.c $(srcdir)/genomicpos.h $(srcdir)/compress.c $(srcdir)/compress.h $(srcdir)/compress-write.c $(srcdir)/compress-write.h \
+ $(srcdir)/gbuffer.c $(srcdir)/gbuffer.h $(srcdir)/genome.c $(srcdir)/genome.h \
+ $(srcdir)/popcount.c $(srcdir)/popcount.h $(srcdir)/genome128_hr.c $(srcdir)/genome128_hr.h $(srcdir)/genome_sites.c $(srcdir)/genome_sites.h \
+ $(srcdir)/genome-write.c $(srcdir)/genome-write.h \
+ $(srcdir)/bitpack64-read.c $(srcdir)/bitpack64-read.h $(srcdir)/bitpack64-readtwo.c $(srcdir)/bitpack64-readtwo.h \
+ $(srcdir)/indexdbdef.h $(srcdir)/indexdb.c $(srcdir)/indexdb.h $(srcdir)/indexdb_hr.c $(srcdir)/indexdb_hr.h \
+ $(srcdir)/oligo.c $(srcdir)/oligo.h $(srcdir)/block.c $(srcdir)/block.h \
+ $(srcdir)/chrom.c $(srcdir)/chrom.h $(srcdir)/segmentpos.c $(srcdir)/segmentpos.h \
+ $(srcdir)/chrnum.c $(srcdir)/chrnum.h $(srcdir)/uinttable.c $(srcdir)/uinttable.h $(srcdir)/gregion.c $(srcdir)/gregion.h \
+ $(srcdir)/matchdef.h $(srcdir)/match.c $(srcdir)/match.h $(srcdir)/matchpool.c $(srcdir)/matchpool.h \
+ $(srcdir)/diagnostic.c $(srcdir)/diagnostic.h $(srcdir)/stage1.c $(srcdir)/stage1.h \
+ $(srcdir)/diagdef.h $(srcdir)/diag.c $(srcdir)/diag.h $(srcdir)/diagpool.c $(srcdir)/diagpool.h \
+ $(srcdir)/cmet.c $(srcdir)/cmet.h $(srcdir)/atoi.c $(srcdir)/atoi.h \
+ $(srcdir)/orderstat.c $(srcdir)/orderstat.h $(srcdir)/oligoindex_hr.c $(srcdir)/oligoindex_hr.h \
+ $(srcdir)/scores.h $(srcdir)/intron.c $(srcdir)/intron.h $(srcdir)/maxent.c $(srcdir)/maxent.h $(srcdir)/maxent_hr.c $(srcdir)/maxent_hr.h $(srcdir)/samflags.h $(srcdir)/pairdef.h $(srcdir)/pair.c $(srcdir)/pair.h \
+ $(srcdir)/pairpool.c $(srcdir)/pairpool.h $(srcdir)/cellpool.c $(srcdir)/cellpool.h $(srcdir)/stage2.c $(srcdir)/stage2.h \
+ $(srcdir)/doublelist.c $(srcdir)/doublelist.h $(srcdir)/smooth.c $(srcdir)/smooth.h \
+ $(srcdir)/splicestringpool.c $(srcdir)/splicestringpool.h $(srcdir)/splicetrie_build.c $(srcdir)/splicetrie_build.h $(srcdir)/splicetrie.c $(srcdir)/splicetrie.h \
+ $(srcdir)/boyer-moore.c $(srcdir)/boyer-moore.h \
+ $(srcdir)/dynprog.c $(srcdir)/dynprog.h $(srcdir)/dynprog_simd.c $(srcdir)/dynprog_simd.h \
+ $(srcdir)/dynprog_single.c $(srcdir)/dynprog_single.h $(srcdir)/dynprog_genome.c $(srcdir)/dynprog_genome.h $(srcdir)/dynprog_cdna.c $(srcdir)/dynprog_cdna.h $(srcdir)/dynprog_end.c $(srcdir)/dynprog_end.h \
+ $(srcdir)/translation.c $(srcdir)/translation.h \
+ $(srcdir)/pbinom.c $(srcdir)/pbinom.h $(srcdir)/changepoint.c $(srcdir)/changepoint.h $(srcdir)/sense.h $(srcdir)/fastlog.h $(srcdir)/stage3.c $(srcdir)/stage3.h \
+ $(srcdir)/request.c $(srcdir)/request.h $(srcdir)/result.c $(srcdir)/result.h $(srcdir)/output.c $(srcdir)/output.h \
+ $(srcdir)/inbuffer.c $(srcdir)/inbuffer.h $(srcdir)/samheader.c $(srcdir)/samheader.h $(srcdir)/outbuffer.c $(srcdir)/outbuffer.h \
+ $(srcdir)/chimera.c $(srcdir)/chimera.h $(srcdir)/datadir.c $(srcdir)/datadir.h \
+ $(srcdir)/getopt.c $(srcdir)/getopt1.c $(srcdir)/getopt.h $(srcdir)/gmap.c
+
+
+# Note: dist_ commands get read by bootstrap, and don't follow the flags
+mpi_gmap_CC = $(MPICC)
+mpi_gmap_CFLAGS = $(MPI_CFLAGS) $(AM_CFLAGS) $(PTHREAD_CFLAGS) $(POPCNT_CFLAGS) $(SIMD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DUSE_MPI=1
+mpi_gmap_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
+mpi_gmap_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
+nodist_mpi_gmap_SOURCES = $(MPI_GMAP_FILES)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu mpi/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu mpi/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+ @$(NORMAL_INSTALL)
+ test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
+ @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed 's/$(EXEEXT)$$//' | \
+ while read p p1; do if test -f $$p || test -f $$p1; \
+ then echo "$$p"; echo "$$p"; else :; fi; \
+ done | \
+ sed -e 'p;s,.*/,,;n;h' -e 's|.*|.|' \
+ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+ sed 'N;N;N;s,\n, ,g' | \
+ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+ if ($$2 == $$4) files[d] = files[d] " " $$1; \
+ else { print "f", $$3 "/" $$4, $$1; } } \
+ END { for (d in files) print "f", d, files[d] }' | \
+ while read type dir files; do \
+ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+ test -z "$$files" || { \
+ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+ } \
+ ; done
+
+uninstall-binPROGRAMS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ files=`for p in $$list; do echo "$$p"; done | \
+ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+ -e 's/$$/$(EXEEXT)/' `; \
+ test -n "$$list" || exit 0; \
+ echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+ cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+ @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+ echo " rm -f" $$list; \
+ rm -f $$list || exit $$?; \
+ test -n "$(EXEEXT)" || exit 0; \
+ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f" $$list; \
+ rm -f $$list
+mpi_gsnap$(EXEEXT): $(mpi_gsnap_OBJECTS) $(mpi_gsnap_DEPENDENCIES)
+ @rm -f mpi_gsnap$(EXEEXT)
+ $(mpi_gsnap_LINK) $(mpi_gsnap_OBJECTS) $(mpi_gsnap_LDADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-access.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-assert.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-atoi.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-bigendian.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-bitpack64-access.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-bitpack64-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-bitpack64-readtwo.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-boyer-moore.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-bytecoding.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-bzip2.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-cellpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-changepoint.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-chimera.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-chrnum.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-cmet.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-compress.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-datadir.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-diag.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-diagpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-doublelist.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-dynprog.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-dynprog_cdna.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-dynprog_end.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-dynprog_genome.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-dynprog_simd.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-dynprog_single.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-filestring.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-gbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-genome.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-genome128_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-genome_sites.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-genomicpos.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-getopt.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-getopt1.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-gsnap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-iit-read-univ.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-iit-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-inbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-indel.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-indexdb.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-indexdb_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-interval.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-intlist.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-intron.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-junction.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-list.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-littleendian.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-mapq.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-master.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-maxent.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-maxent_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-md5.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-mpidebug.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-oligo.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-oligoindex_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-orderstat.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-outbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-output.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-pair.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-pairpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-pbinom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-popcount.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-reader.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-request.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-resulthr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-samheader.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-samprint.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-sarray-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-segmentpos.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-sequence.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-shortread.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-smooth.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-spanningelt.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-splice.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-splicestringpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-splicetrie.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-splicetrie_build.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-stage1hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-stage2.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-stage3.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-stage3hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-stopwatch.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-substring.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-translation.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-uintlist.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-univdiag.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/mpi_gsnap-univinterval.Po at am__quote@
+
+.c.o:
+ at am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(COMPILE) -c $<
+
+.c.obj:
+ at am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+ at am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
+
+mpi_gsnap-mpidebug.o: $(srcdir)/mpidebug.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-mpidebug.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-mpidebug.Tpo -c -o mpi_gsnap-mpidebug.o `test -f '$(srcdir)/mpidebug.c' || echo '$(srcdir)/'`$(srcdir)/mpidebug.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-mpidebug.Tpo $(DEPDIR)/mpi_gsnap-mpidebug.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/mpidebug.c' object='mpi_gsnap-mpidebug.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-mpidebug.o `test -f '$(srcdir)/mpidebug.c' || echo '$(srcdir)/'`$(srcdir)/mpidebug.c
+
+mpi_gsnap-mpidebug.obj: $(srcdir)/mpidebug.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-mpidebug.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-mpidebug.Tpo -c -o mpi_gsnap-mpidebug.obj `if test -f '$(srcdir)/mpidebug.c'; then $(CYGPATH_W) '$(srcdir)/mpidebug.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/mpidebug.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-mpidebug.Tpo $(DEPDIR)/mpi_gsnap-mpidebug.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/mpidebug.c' object='mpi_gsnap-mpidebug.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-mpidebug.obj `if test -f '$(srcdir)/mpidebug.c'; then $(CYGPATH_W) '$(srcdir)/mpidebug.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/mpidebug.c'; fi`
+
+mpi_gsnap-except.o: $(srcdir)/except.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-except.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-except.Tpo -c -o mpi_gsnap-except.o `test -f '$(srcdir)/except.c' || echo '$(srcdir)/'`$(srcdir)/except.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-except.Tpo $(DEPDIR)/mpi_gsnap-except.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/except.c' object='mpi_gsnap-except.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-except.o `test -f '$(srcdir)/except.c' || echo '$(srcdir)/'`$(srcdir)/except.c
+
+mpi_gsnap-except.obj: $(srcdir)/except.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-except.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-except.Tpo -c -o mpi_gsnap-except.obj `if test -f '$(srcdir)/except.c'; then $(CYGPATH_W) '$(srcdir)/except.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/except.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-except.Tpo $(DEPDIR)/mpi_gsnap-except.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/except.c' object='mpi_gsnap-except.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-except.obj `if test -f '$(srcdir)/except.c'; then $(CYGPATH_W) '$(srcdir)/except.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/except.c'; fi`
+
+mpi_gsnap-assert.o: $(srcdir)/assert.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-assert.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-assert.Tpo -c -o mpi_gsnap-assert.o `test -f '$(srcdir)/assert.c' || echo '$(srcdir)/'`$(srcdir)/assert.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-assert.Tpo $(DEPDIR)/mpi_gsnap-assert.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/assert.c' object='mpi_gsnap-assert.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-assert.o `test -f '$(srcdir)/assert.c' || echo '$(srcdir)/'`$(srcdir)/assert.c
+
+mpi_gsnap-assert.obj: $(srcdir)/assert.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-assert.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-assert.Tpo -c -o mpi_gsnap-assert.obj `if test -f '$(srcdir)/assert.c'; then $(CYGPATH_W) '$(srcdir)/assert.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/assert.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-assert.Tpo $(DEPDIR)/mpi_gsnap-assert.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/assert.c' object='mpi_gsnap-assert.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-assert.obj `if test -f '$(srcdir)/assert.c'; then $(CYGPATH_W) '$(srcdir)/assert.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/assert.c'; fi`
+
+mpi_gsnap-mem.o: $(srcdir)/mem.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-mem.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-mem.Tpo -c -o mpi_gsnap-mem.o `test -f '$(srcdir)/mem.c' || echo '$(srcdir)/'`$(srcdir)/mem.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-mem.Tpo $(DEPDIR)/mpi_gsnap-mem.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/mem.c' object='mpi_gsnap-mem.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-mem.o `test -f '$(srcdir)/mem.c' || echo '$(srcdir)/'`$(srcdir)/mem.c
+
+mpi_gsnap-mem.obj: $(srcdir)/mem.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-mem.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-mem.Tpo -c -o mpi_gsnap-mem.obj `if test -f '$(srcdir)/mem.c'; then $(CYGPATH_W) '$(srcdir)/mem.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/mem.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-mem.Tpo $(DEPDIR)/mpi_gsnap-mem.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/mem.c' object='mpi_gsnap-mem.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-mem.obj `if test -f '$(srcdir)/mem.c'; then $(CYGPATH_W) '$(srcdir)/mem.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/mem.c'; fi`
+
+mpi_gsnap-intlist.o: $(srcdir)/intlist.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-intlist.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-intlist.Tpo -c -o mpi_gsnap-intlist.o `test -f '$(srcdir)/intlist.c' || echo '$(srcdir)/'`$(srcdir)/intlist.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-intlist.Tpo $(DEPDIR)/mpi_gsnap-intlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/intlist.c' object='mpi_gsnap-intlist.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-intlist.o `test -f '$(srcdir)/intlist.c' || echo '$(srcdir)/'`$(srcdir)/intlist.c
+
+mpi_gsnap-intlist.obj: $(srcdir)/intlist.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-intlist.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-intlist.Tpo -c -o mpi_gsnap-intlist.obj `if test -f '$(srcdir)/intlist.c'; then $(CYGPATH_W) '$(srcdir)/intlist.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/intlist.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-intlist.Tpo $(DEPDIR)/mpi_gsnap-intlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/intlist.c' object='mpi_gsnap-intlist.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-intlist.obj `if test -f '$(srcdir)/intlist.c'; then $(CYGPATH_W) '$(srcdir)/intlist.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/intlist.c'; fi`
+
+mpi_gsnap-list.o: $(srcdir)/list.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-list.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-list.Tpo -c -o mpi_gsnap-list.o `test -f '$(srcdir)/list.c' || echo '$(srcdir)/'`$(srcdir)/list.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-list.Tpo $(DEPDIR)/mpi_gsnap-list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/list.c' object='mpi_gsnap-list.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-list.o `test -f '$(srcdir)/list.c' || echo '$(srcdir)/'`$(srcdir)/list.c
+
+mpi_gsnap-list.obj: $(srcdir)/list.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-list.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-list.Tpo -c -o mpi_gsnap-list.obj `if test -f '$(srcdir)/list.c'; then $(CYGPATH_W) '$(srcdir)/list.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/list.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-list.Tpo $(DEPDIR)/mpi_gsnap-list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/list.c' object='mpi_gsnap-list.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-list.obj `if test -f '$(srcdir)/list.c'; then $(CYGPATH_W) '$(srcdir)/list.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/list.c'; fi`
+
+mpi_gsnap-littleendian.o: $(srcdir)/littleendian.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-littleendian.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-littleendian.Tpo -c -o mpi_gsnap-littleendian.o `test -f '$(srcdir)/littleendian.c' || echo '$(srcdir)/'`$(srcdir)/littleendian.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-littleendian.Tpo $(DEPDIR)/mpi_gsnap-littleendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/littleendian.c' object='mpi_gsnap-littleendian.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-littleendian.o `test -f '$(srcdir)/littleendian.c' || echo '$(srcdir)/'`$(srcdir)/littleendian.c
+
+mpi_gsnap-littleendian.obj: $(srcdir)/littleendian.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-littleendian.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-littleendian.Tpo -c -o mpi_gsnap-littleendian.obj `if test -f '$(srcdir)/littleendian.c'; then $(CYGPATH_W) '$(srcdir)/littleendian.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/littleendian.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-littleendian.Tpo $(DEPDIR)/mpi_gsnap-littleendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/littleendian.c' object='mpi_gsnap-littleendian.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-littleendian.obj `if test -f '$(srcdir)/littleendian.c'; then $(CYGPATH_W) '$(srcdir)/littleendian.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/littleendian.c'; fi`
+
+mpi_gsnap-bigendian.o: $(srcdir)/bigendian.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-bigendian.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-bigendian.Tpo -c -o mpi_gsnap-bigendian.o `test -f '$(srcdir)/bigendian.c' || echo '$(srcdir)/'`$(srcdir)/bigendian.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-bigendian.Tpo $(DEPDIR)/mpi_gsnap-bigendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/bigendian.c' object='mpi_gsnap-bigendian.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-bigendian.o `test -f '$(srcdir)/bigendian.c' || echo '$(srcdir)/'`$(srcdir)/bigendian.c
+
+mpi_gsnap-bigendian.obj: $(srcdir)/bigendian.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-bigendian.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-bigendian.Tpo -c -o mpi_gsnap-bigendian.obj `if test -f '$(srcdir)/bigendian.c'; then $(CYGPATH_W) '$(srcdir)/bigendian.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/bigendian.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-bigendian.Tpo $(DEPDIR)/mpi_gsnap-bigendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/bigendian.c' object='mpi_gsnap-bigendian.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-bigendian.obj `if test -f '$(srcdir)/bigendian.c'; then $(CYGPATH_W) '$(srcdir)/bigendian.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/bigendian.c'; fi`
+
+mpi_gsnap-univinterval.o: $(srcdir)/univinterval.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-univinterval.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-univinterval.Tpo -c -o mpi_gsnap-univinterval.o `test -f '$(srcdir)/univinterval.c' || echo '$(srcdir)/'`$(srcdir)/univinterval.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-univinterval.Tpo $(DEPDIR)/mpi_gsnap-univinterval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/univinterval.c' object='mpi_gsnap-univinterval.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-univinterval.o `test -f '$(srcdir)/univinterval.c' || echo '$(srcdir)/'`$(srcdir)/univinterval.c
+
+mpi_gsnap-univinterval.obj: $(srcdir)/univinterval.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-univinterval.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-univinterval.Tpo -c -o mpi_gsnap-univinterval.obj `if test -f '$(srcdir)/univinterval.c'; then $(CYGPATH_W) '$(srcdir)/univinterval.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/univinterval.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-univinterval.Tpo $(DEPDIR)/mpi_gsnap-univinterval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/univinterval.c' object='mpi_gsnap-univinterval.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-univinterval.obj `if test -f '$(srcdir)/univinterval.c'; then $(CYGPATH_W) '$(srcdir)/univinterval.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/univinterval.c'; fi`
+
+mpi_gsnap-interval.o: $(srcdir)/interval.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-interval.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-interval.Tpo -c -o mpi_gsnap-interval.o `test -f '$(srcdir)/interval.c' || echo '$(srcdir)/'`$(srcdir)/interval.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-interval.Tpo $(DEPDIR)/mpi_gsnap-interval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/interval.c' object='mpi_gsnap-interval.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-interval.o `test -f '$(srcdir)/interval.c' || echo '$(srcdir)/'`$(srcdir)/interval.c
+
+mpi_gsnap-interval.obj: $(srcdir)/interval.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-interval.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-interval.Tpo -c -o mpi_gsnap-interval.obj `if test -f '$(srcdir)/interval.c'; then $(CYGPATH_W) '$(srcdir)/interval.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/interval.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-interval.Tpo $(DEPDIR)/mpi_gsnap-interval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/interval.c' object='mpi_gsnap-interval.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-interval.obj `if test -f '$(srcdir)/interval.c'; then $(CYGPATH_W) '$(srcdir)/interval.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/interval.c'; fi`
+
+mpi_gsnap-uintlist.o: $(srcdir)/uintlist.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-uintlist.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-uintlist.Tpo -c -o mpi_gsnap-uintlist.o `test -f '$(srcdir)/uintlist.c' || echo '$(srcdir)/'`$(srcdir)/uintlist.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-uintlist.Tpo $(DEPDIR)/mpi_gsnap-uintlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/uintlist.c' object='mpi_gsnap-uintlist.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-uintlist.o `test -f '$(srcdir)/uintlist.c' || echo '$(srcdir)/'`$(srcdir)/uintlist.c
+
+mpi_gsnap-uintlist.obj: $(srcdir)/uintlist.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-uintlist.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-uintlist.Tpo -c -o mpi_gsnap-uintlist.obj `if test -f '$(srcdir)/uintlist.c'; then $(CYGPATH_W) '$(srcdir)/uintlist.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/uintlist.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-uintlist.Tpo $(DEPDIR)/mpi_gsnap-uintlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/uintlist.c' object='mpi_gsnap-uintlist.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-uintlist.obj `if test -f '$(srcdir)/uintlist.c'; then $(CYGPATH_W) '$(srcdir)/uintlist.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/uintlist.c'; fi`
+
+mpi_gsnap-stopwatch.o: $(srcdir)/stopwatch.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-stopwatch.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-stopwatch.Tpo -c -o mpi_gsnap-stopwatch.o `test -f '$(srcdir)/stopwatch.c' || echo '$(srcdir)/'`$(srcdir)/stopwatch.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-stopwatch.Tpo $(DEPDIR)/mpi_gsnap-stopwatch.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/stopwatch.c' object='mpi_gsnap-stopwatch.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-stopwatch.o `test -f '$(srcdir)/stopwatch.c' || echo '$(srcdir)/'`$(srcdir)/stopwatch.c
+
+mpi_gsnap-stopwatch.obj: $(srcdir)/stopwatch.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-stopwatch.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-stopwatch.Tpo -c -o mpi_gsnap-stopwatch.obj `if test -f '$(srcdir)/stopwatch.c'; then $(CYGPATH_W) '$(srcdir)/stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/stopwatch.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-stopwatch.Tpo $(DEPDIR)/mpi_gsnap-stopwatch.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/stopwatch.c' object='mpi_gsnap-stopwatch.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-stopwatch.obj `if test -f '$(srcdir)/stopwatch.c'; then $(CYGPATH_W) '$(srcdir)/stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/stopwatch.c'; fi`
+
+mpi_gsnap-access.o: $(srcdir)/access.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-access.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-access.Tpo -c -o mpi_gsnap-access.o `test -f '$(srcdir)/access.c' || echo '$(srcdir)/'`$(srcdir)/access.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-access.Tpo $(DEPDIR)/mpi_gsnap-access.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/access.c' object='mpi_gsnap-access.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-access.o `test -f '$(srcdir)/access.c' || echo '$(srcdir)/'`$(srcdir)/access.c
+
+mpi_gsnap-access.obj: $(srcdir)/access.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-access.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-access.Tpo -c -o mpi_gsnap-access.obj `if test -f '$(srcdir)/access.c'; then $(CYGPATH_W) '$(srcdir)/access.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/access.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-access.Tpo $(DEPDIR)/mpi_gsnap-access.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/access.c' object='mpi_gsnap-access.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-access.obj `if test -f '$(srcdir)/access.c'; then $(CYGPATH_W) '$(srcdir)/access.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/access.c'; fi`
+
+mpi_gsnap-filestring.o: $(srcdir)/filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-filestring.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-filestring.Tpo -c -o mpi_gsnap-filestring.o `test -f '$(srcdir)/filestring.c' || echo '$(srcdir)/'`$(srcdir)/filestring.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-filestring.Tpo $(DEPDIR)/mpi_gsnap-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/filestring.c' object='mpi_gsnap-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-filestring.o `test -f '$(srcdir)/filestring.c' || echo '$(srcdir)/'`$(srcdir)/filestring.c
+
+mpi_gsnap-filestring.obj: $(srcdir)/filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-filestring.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-filestring.Tpo -c -o mpi_gsnap-filestring.obj `if test -f '$(srcdir)/filestring.c'; then $(CYGPATH_W) '$(srcdir)/filestring.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-filestring.Tpo $(DEPDIR)/mpi_gsnap-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/filestring.c' object='mpi_gsnap-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-filestring.obj `if test -f '$(srcdir)/filestring.c'; then $(CYGPATH_W) '$(srcdir)/filestring.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/filestring.c'; fi`
+
+mpi_gsnap-iit-read-univ.o: $(srcdir)/iit-read-univ.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-iit-read-univ.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-iit-read-univ.Tpo -c -o mpi_gsnap-iit-read-univ.o `test -f '$(srcdir)/iit-read-univ.c' || echo '$(srcdir)/'`$(srcdir)/iit-read-univ.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-iit-read-univ.Tpo $(DEPDIR)/mpi_gsnap-iit-read-univ.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/iit-read-univ.c' object='mpi_gsnap-iit-read-univ.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-iit-read-univ.o `test -f '$(srcdir)/iit-read-univ.c' || echo '$(srcdir)/'`$(srcdir)/iit-read-univ.c
+
+mpi_gsnap-iit-read-univ.obj: $(srcdir)/iit-read-univ.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-iit-read-univ.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-iit-read-univ.Tpo -c -o mpi_gsnap-iit-read-univ.obj `if test -f '$(srcdir)/iit-read-univ.c'; then $(CYGPATH_W) '$(srcdir)/iit-read-univ.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/iit-read-univ.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-iit-read-univ.Tpo $(DEPDIR)/mpi_gsnap-iit-read-univ.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/iit-read-univ.c' object='mpi_gsnap-iit-read-univ.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-iit-read-univ.obj `if test -f '$(srcdir)/iit-read-univ.c'; then $(CYGPATH_W) '$(srcdir)/iit-read-univ.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/iit-read-univ.c'; fi`
+
+mpi_gsnap-iit-read.o: $(srcdir)/iit-read.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-iit-read.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-iit-read.Tpo -c -o mpi_gsnap-iit-read.o `test -f '$(srcdir)/iit-read.c' || echo '$(srcdir)/'`$(srcdir)/iit-read.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-iit-read.Tpo $(DEPDIR)/mpi_gsnap-iit-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/iit-read.c' object='mpi_gsnap-iit-read.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-iit-read.o `test -f '$(srcdir)/iit-read.c' || echo '$(srcdir)/'`$(srcdir)/iit-read.c
+
+mpi_gsnap-iit-read.obj: $(srcdir)/iit-read.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-iit-read.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-iit-read.Tpo -c -o mpi_gsnap-iit-read.obj `if test -f '$(srcdir)/iit-read.c'; then $(CYGPATH_W) '$(srcdir)/iit-read.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/iit-read.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-iit-read.Tpo $(DEPDIR)/mpi_gsnap-iit-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/iit-read.c' object='mpi_gsnap-iit-read.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-iit-read.obj `if test -f '$(srcdir)/iit-read.c'; then $(CYGPATH_W) '$(srcdir)/iit-read.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/iit-read.c'; fi`
+
+mpi_gsnap-md5.o: $(srcdir)/md5.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-md5.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-md5.Tpo -c -o mpi_gsnap-md5.o `test -f '$(srcdir)/md5.c' || echo '$(srcdir)/'`$(srcdir)/md5.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-md5.Tpo $(DEPDIR)/mpi_gsnap-md5.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/md5.c' object='mpi_gsnap-md5.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-md5.o `test -f '$(srcdir)/md5.c' || echo '$(srcdir)/'`$(srcdir)/md5.c
+
+mpi_gsnap-md5.obj: $(srcdir)/md5.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-md5.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-md5.Tpo -c -o mpi_gsnap-md5.obj `if test -f '$(srcdir)/md5.c'; then $(CYGPATH_W) '$(srcdir)/md5.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/md5.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-md5.Tpo $(DEPDIR)/mpi_gsnap-md5.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/md5.c' object='mpi_gsnap-md5.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-md5.obj `if test -f '$(srcdir)/md5.c'; then $(CYGPATH_W) '$(srcdir)/md5.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/md5.c'; fi`
+
+mpi_gsnap-bzip2.o: $(srcdir)/bzip2.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-bzip2.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-bzip2.Tpo -c -o mpi_gsnap-bzip2.o `test -f '$(srcdir)/bzip2.c' || echo '$(srcdir)/'`$(srcdir)/bzip2.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-bzip2.Tpo $(DEPDIR)/mpi_gsnap-bzip2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/bzip2.c' object='mpi_gsnap-bzip2.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-bzip2.o `test -f '$(srcdir)/bzip2.c' || echo '$(srcdir)/'`$(srcdir)/bzip2.c
+
+mpi_gsnap-bzip2.obj: $(srcdir)/bzip2.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-bzip2.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-bzip2.Tpo -c -o mpi_gsnap-bzip2.obj `if test -f '$(srcdir)/bzip2.c'; then $(CYGPATH_W) '$(srcdir)/bzip2.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/bzip2.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-bzip2.Tpo $(DEPDIR)/mpi_gsnap-bzip2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/bzip2.c' object='mpi_gsnap-bzip2.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-bzip2.obj `if test -f '$(srcdir)/bzip2.c'; then $(CYGPATH_W) '$(srcdir)/bzip2.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/bzip2.c'; fi`
+
+mpi_gsnap-sequence.o: $(srcdir)/sequence.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-sequence.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-sequence.Tpo -c -o mpi_gsnap-sequence.o `test -f '$(srcdir)/sequence.c' || echo '$(srcdir)/'`$(srcdir)/sequence.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-sequence.Tpo $(DEPDIR)/mpi_gsnap-sequence.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/sequence.c' object='mpi_gsnap-sequence.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-sequence.o `test -f '$(srcdir)/sequence.c' || echo '$(srcdir)/'`$(srcdir)/sequence.c
+
+mpi_gsnap-sequence.obj: $(srcdir)/sequence.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-sequence.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-sequence.Tpo -c -o mpi_gsnap-sequence.obj `if test -f '$(srcdir)/sequence.c'; then $(CYGPATH_W) '$(srcdir)/sequence.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/sequence.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-sequence.Tpo $(DEPDIR)/mpi_gsnap-sequence.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/sequence.c' object='mpi_gsnap-sequence.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-sequence.obj `if test -f '$(srcdir)/sequence.c'; then $(CYGPATH_W) '$(srcdir)/sequence.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/sequence.c'; fi`
+
+mpi_gsnap-reader.o: $(srcdir)/reader.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-reader.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-reader.Tpo -c -o mpi_gsnap-reader.o `test -f '$(srcdir)/reader.c' || echo '$(srcdir)/'`$(srcdir)/reader.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-reader.Tpo $(DEPDIR)/mpi_gsnap-reader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/reader.c' object='mpi_gsnap-reader.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-reader.o `test -f '$(srcdir)/reader.c' || echo '$(srcdir)/'`$(srcdir)/reader.c
+
+mpi_gsnap-reader.obj: $(srcdir)/reader.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-reader.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-reader.Tpo -c -o mpi_gsnap-reader.obj `if test -f '$(srcdir)/reader.c'; then $(CYGPATH_W) '$(srcdir)/reader.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/reader.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-reader.Tpo $(DEPDIR)/mpi_gsnap-reader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/reader.c' object='mpi_gsnap-reader.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-reader.obj `if test -f '$(srcdir)/reader.c'; then $(CYGPATH_W) '$(srcdir)/reader.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/reader.c'; fi`
+
+mpi_gsnap-genomicpos.o: $(srcdir)/genomicpos.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-genomicpos.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-genomicpos.Tpo -c -o mpi_gsnap-genomicpos.o `test -f '$(srcdir)/genomicpos.c' || echo '$(srcdir)/'`$(srcdir)/genomicpos.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-genomicpos.Tpo $(DEPDIR)/mpi_gsnap-genomicpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/genomicpos.c' object='mpi_gsnap-genomicpos.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-genomicpos.o `test -f '$(srcdir)/genomicpos.c' || echo '$(srcdir)/'`$(srcdir)/genomicpos.c
+
+mpi_gsnap-genomicpos.obj: $(srcdir)/genomicpos.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-genomicpos.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-genomicpos.Tpo -c -o mpi_gsnap-genomicpos.obj `if test -f '$(srcdir)/genomicpos.c'; then $(CYGPATH_W) '$(srcdir)/genomicpos.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/genomicpos.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-genomicpos.Tpo $(DEPDIR)/mpi_gsnap-genomicpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/genomicpos.c' object='mpi_gsnap-genomicpos.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-genomicpos.obj `if test -f '$(srcdir)/genomicpos.c'; then $(CYGPATH_W) '$(srcdir)/genomicpos.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/genomicpos.c'; fi`
+
+mpi_gsnap-compress.o: $(srcdir)/compress.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-compress.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-compress.Tpo -c -o mpi_gsnap-compress.o `test -f '$(srcdir)/compress.c' || echo '$(srcdir)/'`$(srcdir)/compress.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-compress.Tpo $(DEPDIR)/mpi_gsnap-compress.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/compress.c' object='mpi_gsnap-compress.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-compress.o `test -f '$(srcdir)/compress.c' || echo '$(srcdir)/'`$(srcdir)/compress.c
+
+mpi_gsnap-compress.obj: $(srcdir)/compress.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-compress.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-compress.Tpo -c -o mpi_gsnap-compress.obj `if test -f '$(srcdir)/compress.c'; then $(CYGPATH_W) '$(srcdir)/compress.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/compress.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-compress.Tpo $(DEPDIR)/mpi_gsnap-compress.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/compress.c' object='mpi_gsnap-compress.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-compress.obj `if test -f '$(srcdir)/compress.c'; then $(CYGPATH_W) '$(srcdir)/compress.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/compress.c'; fi`
+
+mpi_gsnap-genome.o: $(srcdir)/genome.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-genome.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-genome.Tpo -c -o mpi_gsnap-genome.o `test -f '$(srcdir)/genome.c' || echo '$(srcdir)/'`$(srcdir)/genome.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-genome.Tpo $(DEPDIR)/mpi_gsnap-genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/genome.c' object='mpi_gsnap-genome.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-genome.o `test -f '$(srcdir)/genome.c' || echo '$(srcdir)/'`$(srcdir)/genome.c
+
+mpi_gsnap-genome.obj: $(srcdir)/genome.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-genome.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-genome.Tpo -c -o mpi_gsnap-genome.obj `if test -f '$(srcdir)/genome.c'; then $(CYGPATH_W) '$(srcdir)/genome.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/genome.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-genome.Tpo $(DEPDIR)/mpi_gsnap-genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/genome.c' object='mpi_gsnap-genome.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-genome.obj `if test -f '$(srcdir)/genome.c'; then $(CYGPATH_W) '$(srcdir)/genome.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/genome.c'; fi`
+
+mpi_gsnap-popcount.o: $(srcdir)/popcount.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-popcount.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-popcount.Tpo -c -o mpi_gsnap-popcount.o `test -f '$(srcdir)/popcount.c' || echo '$(srcdir)/'`$(srcdir)/popcount.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-popcount.Tpo $(DEPDIR)/mpi_gsnap-popcount.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/popcount.c' object='mpi_gsnap-popcount.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-popcount.o `test -f '$(srcdir)/popcount.c' || echo '$(srcdir)/'`$(srcdir)/popcount.c
+
+mpi_gsnap-popcount.obj: $(srcdir)/popcount.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-popcount.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-popcount.Tpo -c -o mpi_gsnap-popcount.obj `if test -f '$(srcdir)/popcount.c'; then $(CYGPATH_W) '$(srcdir)/popcount.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/popcount.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-popcount.Tpo $(DEPDIR)/mpi_gsnap-popcount.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/popcount.c' object='mpi_gsnap-popcount.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-popcount.obj `if test -f '$(srcdir)/popcount.c'; then $(CYGPATH_W) '$(srcdir)/popcount.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/popcount.c'; fi`
+
+mpi_gsnap-genome128_hr.o: $(srcdir)/genome128_hr.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-genome128_hr.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-genome128_hr.Tpo -c -o mpi_gsnap-genome128_hr.o `test -f '$(srcdir)/genome128_hr.c' || echo '$(srcdir)/'`$(srcdir)/genome128_hr.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-genome128_hr.Tpo $(DEPDIR)/mpi_gsnap-genome128_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/genome128_hr.c' object='mpi_gsnap-genome128_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-genome128_hr.o `test -f '$(srcdir)/genome128_hr.c' || echo '$(srcdir)/'`$(srcdir)/genome128_hr.c
+
+mpi_gsnap-genome128_hr.obj: $(srcdir)/genome128_hr.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-genome128_hr.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-genome128_hr.Tpo -c -o mpi_gsnap-genome128_hr.obj `if test -f '$(srcdir)/genome128_hr.c'; then $(CYGPATH_W) '$(srcdir)/genome128_hr.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/genome128_hr.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-genome128_hr.Tpo $(DEPDIR)/mpi_gsnap-genome128_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/genome128_hr.c' object='mpi_gsnap-genome128_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-genome128_hr.obj `if test -f '$(srcdir)/genome128_hr.c'; then $(CYGPATH_W) '$(srcdir)/genome128_hr.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/genome128_hr.c'; fi`
+
+mpi_gsnap-genome_sites.o: $(srcdir)/genome_sites.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-genome_sites.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-genome_sites.Tpo -c -o mpi_gsnap-genome_sites.o `test -f '$(srcdir)/genome_sites.c' || echo '$(srcdir)/'`$(srcdir)/genome_sites.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-genome_sites.Tpo $(DEPDIR)/mpi_gsnap-genome_sites.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/genome_sites.c' object='mpi_gsnap-genome_sites.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-genome_sites.o `test -f '$(srcdir)/genome_sites.c' || echo '$(srcdir)/'`$(srcdir)/genome_sites.c
+
+mpi_gsnap-genome_sites.obj: $(srcdir)/genome_sites.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-genome_sites.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-genome_sites.Tpo -c -o mpi_gsnap-genome_sites.obj `if test -f '$(srcdir)/genome_sites.c'; then $(CYGPATH_W) '$(srcdir)/genome_sites.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/genome_sites.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-genome_sites.Tpo $(DEPDIR)/mpi_gsnap-genome_sites.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/genome_sites.c' object='mpi_gsnap-genome_sites.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-genome_sites.obj `if test -f '$(srcdir)/genome_sites.c'; then $(CYGPATH_W) '$(srcdir)/genome_sites.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/genome_sites.c'; fi`
+
+mpi_gsnap-bitpack64-read.o: $(srcdir)/bitpack64-read.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-bitpack64-read.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-bitpack64-read.Tpo -c -o mpi_gsnap-bitpack64-read.o `test -f '$(srcdir)/bitpack64-read.c' || echo '$(srcdir)/'`$(srcdir)/bitpack64-read.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-bitpack64-read.Tpo $(DEPDIR)/mpi_gsnap-bitpack64-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/bitpack64-read.c' object='mpi_gsnap-bitpack64-read.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-bitpack64-read.o `test -f '$(srcdir)/bitpack64-read.c' || echo '$(srcdir)/'`$(srcdir)/bitpack64-read.c
+
+mpi_gsnap-bitpack64-read.obj: $(srcdir)/bitpack64-read.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-bitpack64-read.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-bitpack64-read.Tpo -c -o mpi_gsnap-bitpack64-read.obj `if test -f '$(srcdir)/bitpack64-read.c'; then $(CYGPATH_W) '$(srcdir)/bitpack64-read.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/bitpack64-read.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-bitpack64-read.Tpo $(DEPDIR)/mpi_gsnap-bitpack64-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/bitpack64-read.c' object='mpi_gsnap-bitpack64-read.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-bitpack64-read.obj `if test -f '$(srcdir)/bitpack64-read.c'; then $(CYGPATH_W) '$(srcdir)/bitpack64-read.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/bitpack64-read.c'; fi`
+
+mpi_gsnap-bitpack64-readtwo.o: $(srcdir)/bitpack64-readtwo.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-bitpack64-readtwo.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-bitpack64-readtwo.Tpo -c -o mpi_gsnap-bitpack64-readtwo.o `test -f '$(srcdir)/bitpack64-readtwo.c' || echo '$(srcdir)/'`$(srcdir)/bitpack64-readtwo.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-bitpack64-readtwo.Tpo $(DEPDIR)/mpi_gsnap-bitpack64-readtwo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/bitpack64-readtwo.c' object='mpi_gsnap-bitpack64-readtwo.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-bitpack64-readtwo.o `test -f '$(srcdir)/bitpack64-readtwo.c' || echo '$(srcdir)/'`$(srcdir)/bitpack64-readtwo.c
+
+mpi_gsnap-bitpack64-readtwo.obj: $(srcdir)/bitpack64-readtwo.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-bitpack64-readtwo.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-bitpack64-readtwo.Tpo -c -o mpi_gsnap-bitpack64-readtwo.obj `if test -f '$(srcdir)/bitpack64-readtwo.c'; then $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/bitpack64-readtwo.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-bitpack64-readtwo.Tpo $(DEPDIR)/mpi_gsnap-bitpack64-readtwo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/bitpack64-readtwo.c' object='mpi_gsnap-bitpack64-readtwo.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-bitpack64-readtwo.obj `if test -f '$(srcdir)/bitpack64-readtwo.c'; then $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/bitpack64-readtwo.c'; fi`
+
+mpi_gsnap-indexdb.o: $(srcdir)/indexdb.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-indexdb.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-indexdb.Tpo -c -o mpi_gsnap-indexdb.o `test -f '$(srcdir)/indexdb.c' || echo '$(srcdir)/'`$(srcdir)/indexdb.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-indexdb.Tpo $(DEPDIR)/mpi_gsnap-indexdb.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/indexdb.c' object='mpi_gsnap-indexdb.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-indexdb.o `test -f '$(srcdir)/indexdb.c' || echo '$(srcdir)/'`$(srcdir)/indexdb.c
+
+mpi_gsnap-indexdb.obj: $(srcdir)/indexdb.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-indexdb.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-indexdb.Tpo -c -o mpi_gsnap-indexdb.obj `if test -f '$(srcdir)/indexdb.c'; then $(CYGPATH_W) '$(srcdir)/indexdb.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/indexdb.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-indexdb.Tpo $(DEPDIR)/mpi_gsnap-indexdb.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/indexdb.c' object='mpi_gsnap-indexdb.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-indexdb.obj `if test -f '$(srcdir)/indexdb.c'; then $(CYGPATH_W) '$(srcdir)/indexdb.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/indexdb.c'; fi`
+
+mpi_gsnap-indexdb_hr.o: $(srcdir)/indexdb_hr.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-indexdb_hr.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-indexdb_hr.Tpo -c -o mpi_gsnap-indexdb_hr.o `test -f '$(srcdir)/indexdb_hr.c' || echo '$(srcdir)/'`$(srcdir)/indexdb_hr.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-indexdb_hr.Tpo $(DEPDIR)/mpi_gsnap-indexdb_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/indexdb_hr.c' object='mpi_gsnap-indexdb_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-indexdb_hr.o `test -f '$(srcdir)/indexdb_hr.c' || echo '$(srcdir)/'`$(srcdir)/indexdb_hr.c
+
+mpi_gsnap-indexdb_hr.obj: $(srcdir)/indexdb_hr.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-indexdb_hr.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-indexdb_hr.Tpo -c -o mpi_gsnap-indexdb_hr.obj `if test -f '$(srcdir)/indexdb_hr.c'; then $(CYGPATH_W) '$(srcdir)/indexdb_hr.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/indexdb_hr.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-indexdb_hr.Tpo $(DEPDIR)/mpi_gsnap-indexdb_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/indexdb_hr.c' object='mpi_gsnap-indexdb_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-indexdb_hr.obj `if test -f '$(srcdir)/indexdb_hr.c'; then $(CYGPATH_W) '$(srcdir)/indexdb_hr.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/indexdb_hr.c'; fi`
+
+mpi_gsnap-oligo.o: $(srcdir)/oligo.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-oligo.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-oligo.Tpo -c -o mpi_gsnap-oligo.o `test -f '$(srcdir)/oligo.c' || echo '$(srcdir)/'`$(srcdir)/oligo.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-oligo.Tpo $(DEPDIR)/mpi_gsnap-oligo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/oligo.c' object='mpi_gsnap-oligo.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-oligo.o `test -f '$(srcdir)/oligo.c' || echo '$(srcdir)/'`$(srcdir)/oligo.c
+
+mpi_gsnap-oligo.obj: $(srcdir)/oligo.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-oligo.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-oligo.Tpo -c -o mpi_gsnap-oligo.obj `if test -f '$(srcdir)/oligo.c'; then $(CYGPATH_W) '$(srcdir)/oligo.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/oligo.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-oligo.Tpo $(DEPDIR)/mpi_gsnap-oligo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/oligo.c' object='mpi_gsnap-oligo.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-oligo.obj `if test -f '$(srcdir)/oligo.c'; then $(CYGPATH_W) '$(srcdir)/oligo.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/oligo.c'; fi`
+
+mpi_gsnap-chrom.o: $(srcdir)/chrom.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-chrom.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-chrom.Tpo -c -o mpi_gsnap-chrom.o `test -f '$(srcdir)/chrom.c' || echo '$(srcdir)/'`$(srcdir)/chrom.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-chrom.Tpo $(DEPDIR)/mpi_gsnap-chrom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/chrom.c' object='mpi_gsnap-chrom.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-chrom.o `test -f '$(srcdir)/chrom.c' || echo '$(srcdir)/'`$(srcdir)/chrom.c
+
+mpi_gsnap-chrom.obj: $(srcdir)/chrom.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-chrom.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-chrom.Tpo -c -o mpi_gsnap-chrom.obj `if test -f '$(srcdir)/chrom.c'; then $(CYGPATH_W) '$(srcdir)/chrom.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/chrom.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-chrom.Tpo $(DEPDIR)/mpi_gsnap-chrom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/chrom.c' object='mpi_gsnap-chrom.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-chrom.obj `if test -f '$(srcdir)/chrom.c'; then $(CYGPATH_W) '$(srcdir)/chrom.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/chrom.c'; fi`
+
+mpi_gsnap-segmentpos.o: $(srcdir)/segmentpos.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-segmentpos.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-segmentpos.Tpo -c -o mpi_gsnap-segmentpos.o `test -f '$(srcdir)/segmentpos.c' || echo '$(srcdir)/'`$(srcdir)/segmentpos.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-segmentpos.Tpo $(DEPDIR)/mpi_gsnap-segmentpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/segmentpos.c' object='mpi_gsnap-segmentpos.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-segmentpos.o `test -f '$(srcdir)/segmentpos.c' || echo '$(srcdir)/'`$(srcdir)/segmentpos.c
+
+mpi_gsnap-segmentpos.obj: $(srcdir)/segmentpos.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-segmentpos.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-segmentpos.Tpo -c -o mpi_gsnap-segmentpos.obj `if test -f '$(srcdir)/segmentpos.c'; then $(CYGPATH_W) '$(srcdir)/segmentpos.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/segmentpos.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-segmentpos.Tpo $(DEPDIR)/mpi_gsnap-segmentpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/segmentpos.c' object='mpi_gsnap-segmentpos.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-segmentpos.obj `if test -f '$(srcdir)/segmentpos.c'; then $(CYGPATH_W) '$(srcdir)/segmentpos.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/segmentpos.c'; fi`
+
+mpi_gsnap-chrnum.o: $(srcdir)/chrnum.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-chrnum.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-chrnum.Tpo -c -o mpi_gsnap-chrnum.o `test -f '$(srcdir)/chrnum.c' || echo '$(srcdir)/'`$(srcdir)/chrnum.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-chrnum.Tpo $(DEPDIR)/mpi_gsnap-chrnum.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/chrnum.c' object='mpi_gsnap-chrnum.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-chrnum.o `test -f '$(srcdir)/chrnum.c' || echo '$(srcdir)/'`$(srcdir)/chrnum.c
+
+mpi_gsnap-chrnum.obj: $(srcdir)/chrnum.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-chrnum.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-chrnum.Tpo -c -o mpi_gsnap-chrnum.obj `if test -f '$(srcdir)/chrnum.c'; then $(CYGPATH_W) '$(srcdir)/chrnum.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/chrnum.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-chrnum.Tpo $(DEPDIR)/mpi_gsnap-chrnum.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/chrnum.c' object='mpi_gsnap-chrnum.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-chrnum.obj `if test -f '$(srcdir)/chrnum.c'; then $(CYGPATH_W) '$(srcdir)/chrnum.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/chrnum.c'; fi`
+
+mpi_gsnap-maxent_hr.o: $(srcdir)/maxent_hr.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-maxent_hr.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-maxent_hr.Tpo -c -o mpi_gsnap-maxent_hr.o `test -f '$(srcdir)/maxent_hr.c' || echo '$(srcdir)/'`$(srcdir)/maxent_hr.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-maxent_hr.Tpo $(DEPDIR)/mpi_gsnap-maxent_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/maxent_hr.c' object='mpi_gsnap-maxent_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-maxent_hr.o `test -f '$(srcdir)/maxent_hr.c' || echo '$(srcdir)/'`$(srcdir)/maxent_hr.c
+
+mpi_gsnap-maxent_hr.obj: $(srcdir)/maxent_hr.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-maxent_hr.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-maxent_hr.Tpo -c -o mpi_gsnap-maxent_hr.obj `if test -f '$(srcdir)/maxent_hr.c'; then $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/maxent_hr.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-maxent_hr.Tpo $(DEPDIR)/mpi_gsnap-maxent_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/maxent_hr.c' object='mpi_gsnap-maxent_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-maxent_hr.obj `if test -f '$(srcdir)/maxent_hr.c'; then $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/maxent_hr.c'; fi`
+
+mpi_gsnap-samprint.o: $(srcdir)/samprint.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-samprint.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-samprint.Tpo -c -o mpi_gsnap-samprint.o `test -f '$(srcdir)/samprint.c' || echo '$(srcdir)/'`$(srcdir)/samprint.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-samprint.Tpo $(DEPDIR)/mpi_gsnap-samprint.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/samprint.c' object='mpi_gsnap-samprint.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-samprint.o `test -f '$(srcdir)/samprint.c' || echo '$(srcdir)/'`$(srcdir)/samprint.c
+
+mpi_gsnap-samprint.obj: $(srcdir)/samprint.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-samprint.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-samprint.Tpo -c -o mpi_gsnap-samprint.obj `if test -f '$(srcdir)/samprint.c'; then $(CYGPATH_W) '$(srcdir)/samprint.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/samprint.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-samprint.Tpo $(DEPDIR)/mpi_gsnap-samprint.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/samprint.c' object='mpi_gsnap-samprint.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-samprint.obj `if test -f '$(srcdir)/samprint.c'; then $(CYGPATH_W) '$(srcdir)/samprint.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/samprint.c'; fi`
+
+mpi_gsnap-mapq.o: $(srcdir)/mapq.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-mapq.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-mapq.Tpo -c -o mpi_gsnap-mapq.o `test -f '$(srcdir)/mapq.c' || echo '$(srcdir)/'`$(srcdir)/mapq.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-mapq.Tpo $(DEPDIR)/mpi_gsnap-mapq.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/mapq.c' object='mpi_gsnap-mapq.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-mapq.o `test -f '$(srcdir)/mapq.c' || echo '$(srcdir)/'`$(srcdir)/mapq.c
+
+mpi_gsnap-mapq.obj: $(srcdir)/mapq.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-mapq.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-mapq.Tpo -c -o mpi_gsnap-mapq.obj `if test -f '$(srcdir)/mapq.c'; then $(CYGPATH_W) '$(srcdir)/mapq.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/mapq.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-mapq.Tpo $(DEPDIR)/mpi_gsnap-mapq.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/mapq.c' object='mpi_gsnap-mapq.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-mapq.obj `if test -f '$(srcdir)/mapq.c'; then $(CYGPATH_W) '$(srcdir)/mapq.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/mapq.c'; fi`
+
+mpi_gsnap-shortread.o: $(srcdir)/shortread.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-shortread.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-shortread.Tpo -c -o mpi_gsnap-shortread.o `test -f '$(srcdir)/shortread.c' || echo '$(srcdir)/'`$(srcdir)/shortread.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-shortread.Tpo $(DEPDIR)/mpi_gsnap-shortread.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/shortread.c' object='mpi_gsnap-shortread.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-shortread.o `test -f '$(srcdir)/shortread.c' || echo '$(srcdir)/'`$(srcdir)/shortread.c
+
+mpi_gsnap-shortread.obj: $(srcdir)/shortread.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-shortread.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-shortread.Tpo -c -o mpi_gsnap-shortread.obj `if test -f '$(srcdir)/shortread.c'; then $(CYGPATH_W) '$(srcdir)/shortread.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/shortread.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-shortread.Tpo $(DEPDIR)/mpi_gsnap-shortread.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/shortread.c' object='mpi_gsnap-shortread.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-shortread.obj `if test -f '$(srcdir)/shortread.c'; then $(CYGPATH_W) '$(srcdir)/shortread.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/shortread.c'; fi`
+
+mpi_gsnap-substring.o: $(srcdir)/substring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-substring.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-substring.Tpo -c -o mpi_gsnap-substring.o `test -f '$(srcdir)/substring.c' || echo '$(srcdir)/'`$(srcdir)/substring.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-substring.Tpo $(DEPDIR)/mpi_gsnap-substring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/substring.c' object='mpi_gsnap-substring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-substring.o `test -f '$(srcdir)/substring.c' || echo '$(srcdir)/'`$(srcdir)/substring.c
+
+mpi_gsnap-substring.obj: $(srcdir)/substring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-substring.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-substring.Tpo -c -o mpi_gsnap-substring.obj `if test -f '$(srcdir)/substring.c'; then $(CYGPATH_W) '$(srcdir)/substring.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/substring.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-substring.Tpo $(DEPDIR)/mpi_gsnap-substring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/substring.c' object='mpi_gsnap-substring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-substring.obj `if test -f '$(srcdir)/substring.c'; then $(CYGPATH_W) '$(srcdir)/substring.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/substring.c'; fi`
+
+mpi_gsnap-junction.o: $(srcdir)/junction.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-junction.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-junction.Tpo -c -o mpi_gsnap-junction.o `test -f '$(srcdir)/junction.c' || echo '$(srcdir)/'`$(srcdir)/junction.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-junction.Tpo $(DEPDIR)/mpi_gsnap-junction.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/junction.c' object='mpi_gsnap-junction.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-junction.o `test -f '$(srcdir)/junction.c' || echo '$(srcdir)/'`$(srcdir)/junction.c
+
+mpi_gsnap-junction.obj: $(srcdir)/junction.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-junction.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-junction.Tpo -c -o mpi_gsnap-junction.obj `if test -f '$(srcdir)/junction.c'; then $(CYGPATH_W) '$(srcdir)/junction.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/junction.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-junction.Tpo $(DEPDIR)/mpi_gsnap-junction.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/junction.c' object='mpi_gsnap-junction.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-junction.obj `if test -f '$(srcdir)/junction.c'; then $(CYGPATH_W) '$(srcdir)/junction.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/junction.c'; fi`
+
+mpi_gsnap-stage3hr.o: $(srcdir)/stage3hr.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-stage3hr.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-stage3hr.Tpo -c -o mpi_gsnap-stage3hr.o `test -f '$(srcdir)/stage3hr.c' || echo '$(srcdir)/'`$(srcdir)/stage3hr.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-stage3hr.Tpo $(DEPDIR)/mpi_gsnap-stage3hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/stage3hr.c' object='mpi_gsnap-stage3hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-stage3hr.o `test -f '$(srcdir)/stage3hr.c' || echo '$(srcdir)/'`$(srcdir)/stage3hr.c
+
+mpi_gsnap-stage3hr.obj: $(srcdir)/stage3hr.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-stage3hr.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-stage3hr.Tpo -c -o mpi_gsnap-stage3hr.obj `if test -f '$(srcdir)/stage3hr.c'; then $(CYGPATH_W) '$(srcdir)/stage3hr.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/stage3hr.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-stage3hr.Tpo $(DEPDIR)/mpi_gsnap-stage3hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/stage3hr.c' object='mpi_gsnap-stage3hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-stage3hr.obj `if test -f '$(srcdir)/stage3hr.c'; then $(CYGPATH_W) '$(srcdir)/stage3hr.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/stage3hr.c'; fi`
+
+mpi_gsnap-spanningelt.o: $(srcdir)/spanningelt.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-spanningelt.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-spanningelt.Tpo -c -o mpi_gsnap-spanningelt.o `test -f '$(srcdir)/spanningelt.c' || echo '$(srcdir)/'`$(srcdir)/spanningelt.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-spanningelt.Tpo $(DEPDIR)/mpi_gsnap-spanningelt.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/spanningelt.c' object='mpi_gsnap-spanningelt.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-spanningelt.o `test -f '$(srcdir)/spanningelt.c' || echo '$(srcdir)/'`$(srcdir)/spanningelt.c
+
+mpi_gsnap-spanningelt.obj: $(srcdir)/spanningelt.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-spanningelt.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-spanningelt.Tpo -c -o mpi_gsnap-spanningelt.obj `if test -f '$(srcdir)/spanningelt.c'; then $(CYGPATH_W) '$(srcdir)/spanningelt.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/spanningelt.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-spanningelt.Tpo $(DEPDIR)/mpi_gsnap-spanningelt.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/spanningelt.c' object='mpi_gsnap-spanningelt.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-spanningelt.obj `if test -f '$(srcdir)/spanningelt.c'; then $(CYGPATH_W) '$(srcdir)/spanningelt.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/spanningelt.c'; fi`
+
+mpi_gsnap-cmet.o: $(srcdir)/cmet.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-cmet.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-cmet.Tpo -c -o mpi_gsnap-cmet.o `test -f '$(srcdir)/cmet.c' || echo '$(srcdir)/'`$(srcdir)/cmet.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-cmet.Tpo $(DEPDIR)/mpi_gsnap-cmet.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/cmet.c' object='mpi_gsnap-cmet.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-cmet.o `test -f '$(srcdir)/cmet.c' || echo '$(srcdir)/'`$(srcdir)/cmet.c
+
+mpi_gsnap-cmet.obj: $(srcdir)/cmet.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-cmet.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-cmet.Tpo -c -o mpi_gsnap-cmet.obj `if test -f '$(srcdir)/cmet.c'; then $(CYGPATH_W) '$(srcdir)/cmet.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/cmet.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-cmet.Tpo $(DEPDIR)/mpi_gsnap-cmet.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/cmet.c' object='mpi_gsnap-cmet.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-cmet.obj `if test -f '$(srcdir)/cmet.c'; then $(CYGPATH_W) '$(srcdir)/cmet.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/cmet.c'; fi`
+
+mpi_gsnap-atoi.o: $(srcdir)/atoi.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-atoi.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-atoi.Tpo -c -o mpi_gsnap-atoi.o `test -f '$(srcdir)/atoi.c' || echo '$(srcdir)/'`$(srcdir)/atoi.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-atoi.Tpo $(DEPDIR)/mpi_gsnap-atoi.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/atoi.c' object='mpi_gsnap-atoi.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-atoi.o `test -f '$(srcdir)/atoi.c' || echo '$(srcdir)/'`$(srcdir)/atoi.c
+
+mpi_gsnap-atoi.obj: $(srcdir)/atoi.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-atoi.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-atoi.Tpo -c -o mpi_gsnap-atoi.obj `if test -f '$(srcdir)/atoi.c'; then $(CYGPATH_W) '$(srcdir)/atoi.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/atoi.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-atoi.Tpo $(DEPDIR)/mpi_gsnap-atoi.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/atoi.c' object='mpi_gsnap-atoi.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-atoi.obj `if test -f '$(srcdir)/atoi.c'; then $(CYGPATH_W) '$(srcdir)/atoi.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/atoi.c'; fi`
+
+mpi_gsnap-maxent.o: $(srcdir)/maxent.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-maxent.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-maxent.Tpo -c -o mpi_gsnap-maxent.o `test -f '$(srcdir)/maxent.c' || echo '$(srcdir)/'`$(srcdir)/maxent.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-maxent.Tpo $(DEPDIR)/mpi_gsnap-maxent.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/maxent.c' object='mpi_gsnap-maxent.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-maxent.o `test -f '$(srcdir)/maxent.c' || echo '$(srcdir)/'`$(srcdir)/maxent.c
+
+mpi_gsnap-maxent.obj: $(srcdir)/maxent.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-maxent.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-maxent.Tpo -c -o mpi_gsnap-maxent.obj `if test -f '$(srcdir)/maxent.c'; then $(CYGPATH_W) '$(srcdir)/maxent.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/maxent.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-maxent.Tpo $(DEPDIR)/mpi_gsnap-maxent.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/maxent.c' object='mpi_gsnap-maxent.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-maxent.obj `if test -f '$(srcdir)/maxent.c'; then $(CYGPATH_W) '$(srcdir)/maxent.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/maxent.c'; fi`
+
+mpi_gsnap-pair.o: $(srcdir)/pair.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-pair.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-pair.Tpo -c -o mpi_gsnap-pair.o `test -f '$(srcdir)/pair.c' || echo '$(srcdir)/'`$(srcdir)/pair.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-pair.Tpo $(DEPDIR)/mpi_gsnap-pair.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/pair.c' object='mpi_gsnap-pair.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-pair.o `test -f '$(srcdir)/pair.c' || echo '$(srcdir)/'`$(srcdir)/pair.c
+
+mpi_gsnap-pair.obj: $(srcdir)/pair.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-pair.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-pair.Tpo -c -o mpi_gsnap-pair.obj `if test -f '$(srcdir)/pair.c'; then $(CYGPATH_W) '$(srcdir)/pair.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/pair.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-pair.Tpo $(DEPDIR)/mpi_gsnap-pair.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/pair.c' object='mpi_gsnap-pair.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-pair.obj `if test -f '$(srcdir)/pair.c'; then $(CYGPATH_W) '$(srcdir)/pair.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/pair.c'; fi`
+
+mpi_gsnap-pairpool.o: $(srcdir)/pairpool.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-pairpool.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-pairpool.Tpo -c -o mpi_gsnap-pairpool.o `test -f '$(srcdir)/pairpool.c' || echo '$(srcdir)/'`$(srcdir)/pairpool.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-pairpool.Tpo $(DEPDIR)/mpi_gsnap-pairpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/pairpool.c' object='mpi_gsnap-pairpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-pairpool.o `test -f '$(srcdir)/pairpool.c' || echo '$(srcdir)/'`$(srcdir)/pairpool.c
+
+mpi_gsnap-pairpool.obj: $(srcdir)/pairpool.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-pairpool.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-pairpool.Tpo -c -o mpi_gsnap-pairpool.obj `if test -f '$(srcdir)/pairpool.c'; then $(CYGPATH_W) '$(srcdir)/pairpool.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/pairpool.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-pairpool.Tpo $(DEPDIR)/mpi_gsnap-pairpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/pairpool.c' object='mpi_gsnap-pairpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-pairpool.obj `if test -f '$(srcdir)/pairpool.c'; then $(CYGPATH_W) '$(srcdir)/pairpool.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/pairpool.c'; fi`
+
+mpi_gsnap-diag.o: $(srcdir)/diag.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-diag.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-diag.Tpo -c -o mpi_gsnap-diag.o `test -f '$(srcdir)/diag.c' || echo '$(srcdir)/'`$(srcdir)/diag.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-diag.Tpo $(DEPDIR)/mpi_gsnap-diag.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/diag.c' object='mpi_gsnap-diag.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-diag.o `test -f '$(srcdir)/diag.c' || echo '$(srcdir)/'`$(srcdir)/diag.c
+
+mpi_gsnap-diag.obj: $(srcdir)/diag.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-diag.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-diag.Tpo -c -o mpi_gsnap-diag.obj `if test -f '$(srcdir)/diag.c'; then $(CYGPATH_W) '$(srcdir)/diag.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/diag.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-diag.Tpo $(DEPDIR)/mpi_gsnap-diag.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/diag.c' object='mpi_gsnap-diag.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-diag.obj `if test -f '$(srcdir)/diag.c'; then $(CYGPATH_W) '$(srcdir)/diag.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/diag.c'; fi`
+
+mpi_gsnap-diagpool.o: $(srcdir)/diagpool.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-diagpool.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-diagpool.Tpo -c -o mpi_gsnap-diagpool.o `test -f '$(srcdir)/diagpool.c' || echo '$(srcdir)/'`$(srcdir)/diagpool.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-diagpool.Tpo $(DEPDIR)/mpi_gsnap-diagpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/diagpool.c' object='mpi_gsnap-diagpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-diagpool.o `test -f '$(srcdir)/diagpool.c' || echo '$(srcdir)/'`$(srcdir)/diagpool.c
+
+mpi_gsnap-diagpool.obj: $(srcdir)/diagpool.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-diagpool.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-diagpool.Tpo -c -o mpi_gsnap-diagpool.obj `if test -f '$(srcdir)/diagpool.c'; then $(CYGPATH_W) '$(srcdir)/diagpool.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/diagpool.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-diagpool.Tpo $(DEPDIR)/mpi_gsnap-diagpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/diagpool.c' object='mpi_gsnap-diagpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-diagpool.obj `if test -f '$(srcdir)/diagpool.c'; then $(CYGPATH_W) '$(srcdir)/diagpool.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/diagpool.c'; fi`
+
+mpi_gsnap-orderstat.o: $(srcdir)/orderstat.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-orderstat.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-orderstat.Tpo -c -o mpi_gsnap-orderstat.o `test -f '$(srcdir)/orderstat.c' || echo '$(srcdir)/'`$(srcdir)/orderstat.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-orderstat.Tpo $(DEPDIR)/mpi_gsnap-orderstat.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/orderstat.c' object='mpi_gsnap-orderstat.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-orderstat.o `test -f '$(srcdir)/orderstat.c' || echo '$(srcdir)/'`$(srcdir)/orderstat.c
+
+mpi_gsnap-orderstat.obj: $(srcdir)/orderstat.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-orderstat.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-orderstat.Tpo -c -o mpi_gsnap-orderstat.obj `if test -f '$(srcdir)/orderstat.c'; then $(CYGPATH_W) '$(srcdir)/orderstat.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/orderstat.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-orderstat.Tpo $(DEPDIR)/mpi_gsnap-orderstat.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/orderstat.c' object='mpi_gsnap-orderstat.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-orderstat.obj `if test -f '$(srcdir)/orderstat.c'; then $(CYGPATH_W) '$(srcdir)/orderstat.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/orderstat.c'; fi`
+
+mpi_gsnap-oligoindex_hr.o: $(srcdir)/oligoindex_hr.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-oligoindex_hr.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-oligoindex_hr.Tpo -c -o mpi_gsnap-oligoindex_hr.o `test -f '$(srcdir)/oligoindex_hr.c' || echo '$(srcdir)/'`$(srcdir)/oligoindex_hr.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-oligoindex_hr.Tpo $(DEPDIR)/mpi_gsnap-oligoindex_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/oligoindex_hr.c' object='mpi_gsnap-oligoindex_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-oligoindex_hr.o `test -f '$(srcdir)/oligoindex_hr.c' || echo '$(srcdir)/'`$(srcdir)/oligoindex_hr.c
+
+mpi_gsnap-oligoindex_hr.obj: $(srcdir)/oligoindex_hr.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-oligoindex_hr.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-oligoindex_hr.Tpo -c -o mpi_gsnap-oligoindex_hr.obj `if test -f '$(srcdir)/oligoindex_hr.c'; then $(CYGPATH_W) '$(srcdir)/oligoindex_hr.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/oligoindex_hr.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-oligoindex_hr.Tpo $(DEPDIR)/mpi_gsnap-oligoindex_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/oligoindex_hr.c' object='mpi_gsnap-oligoindex_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-oligoindex_hr.obj `if test -f '$(srcdir)/oligoindex_hr.c'; then $(CYGPATH_W) '$(srcdir)/oligoindex_hr.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/oligoindex_hr.c'; fi`
+
+mpi_gsnap-cellpool.o: $(srcdir)/cellpool.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-cellpool.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-cellpool.Tpo -c -o mpi_gsnap-cellpool.o `test -f '$(srcdir)/cellpool.c' || echo '$(srcdir)/'`$(srcdir)/cellpool.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-cellpool.Tpo $(DEPDIR)/mpi_gsnap-cellpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/cellpool.c' object='mpi_gsnap-cellpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-cellpool.o `test -f '$(srcdir)/cellpool.c' || echo '$(srcdir)/'`$(srcdir)/cellpool.c
+
+mpi_gsnap-cellpool.obj: $(srcdir)/cellpool.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-cellpool.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-cellpool.Tpo -c -o mpi_gsnap-cellpool.obj `if test -f '$(srcdir)/cellpool.c'; then $(CYGPATH_W) '$(srcdir)/cellpool.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/cellpool.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-cellpool.Tpo $(DEPDIR)/mpi_gsnap-cellpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/cellpool.c' object='mpi_gsnap-cellpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-cellpool.obj `if test -f '$(srcdir)/cellpool.c'; then $(CYGPATH_W) '$(srcdir)/cellpool.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/cellpool.c'; fi`
+
+mpi_gsnap-stage2.o: $(srcdir)/stage2.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-stage2.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-stage2.Tpo -c -o mpi_gsnap-stage2.o `test -f '$(srcdir)/stage2.c' || echo '$(srcdir)/'`$(srcdir)/stage2.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-stage2.Tpo $(DEPDIR)/mpi_gsnap-stage2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/stage2.c' object='mpi_gsnap-stage2.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-stage2.o `test -f '$(srcdir)/stage2.c' || echo '$(srcdir)/'`$(srcdir)/stage2.c
+
+mpi_gsnap-stage2.obj: $(srcdir)/stage2.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-stage2.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-stage2.Tpo -c -o mpi_gsnap-stage2.obj `if test -f '$(srcdir)/stage2.c'; then $(CYGPATH_W) '$(srcdir)/stage2.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/stage2.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-stage2.Tpo $(DEPDIR)/mpi_gsnap-stage2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/stage2.c' object='mpi_gsnap-stage2.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-stage2.obj `if test -f '$(srcdir)/stage2.c'; then $(CYGPATH_W) '$(srcdir)/stage2.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/stage2.c'; fi`
+
+mpi_gsnap-intron.o: $(srcdir)/intron.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-intron.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-intron.Tpo -c -o mpi_gsnap-intron.o `test -f '$(srcdir)/intron.c' || echo '$(srcdir)/'`$(srcdir)/intron.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-intron.Tpo $(DEPDIR)/mpi_gsnap-intron.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/intron.c' object='mpi_gsnap-intron.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-intron.o `test -f '$(srcdir)/intron.c' || echo '$(srcdir)/'`$(srcdir)/intron.c
+
+mpi_gsnap-intron.obj: $(srcdir)/intron.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-intron.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-intron.Tpo -c -o mpi_gsnap-intron.obj `if test -f '$(srcdir)/intron.c'; then $(CYGPATH_W) '$(srcdir)/intron.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/intron.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-intron.Tpo $(DEPDIR)/mpi_gsnap-intron.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/intron.c' object='mpi_gsnap-intron.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-intron.obj `if test -f '$(srcdir)/intron.c'; then $(CYGPATH_W) '$(srcdir)/intron.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/intron.c'; fi`
+
+mpi_gsnap-boyer-moore.o: $(srcdir)/boyer-moore.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-boyer-moore.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-boyer-moore.Tpo -c -o mpi_gsnap-boyer-moore.o `test -f '$(srcdir)/boyer-moore.c' || echo '$(srcdir)/'`$(srcdir)/boyer-moore.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-boyer-moore.Tpo $(DEPDIR)/mpi_gsnap-boyer-moore.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/boyer-moore.c' object='mpi_gsnap-boyer-moore.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-boyer-moore.o `test -f '$(srcdir)/boyer-moore.c' || echo '$(srcdir)/'`$(srcdir)/boyer-moore.c
+
+mpi_gsnap-boyer-moore.obj: $(srcdir)/boyer-moore.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-boyer-moore.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-boyer-moore.Tpo -c -o mpi_gsnap-boyer-moore.obj `if test -f '$(srcdir)/boyer-moore.c'; then $(CYGPATH_W) '$(srcdir)/boyer-moore.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/boyer-moore.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-boyer-moore.Tpo $(DEPDIR)/mpi_gsnap-boyer-moore.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/boyer-moore.c' object='mpi_gsnap-boyer-moore.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-boyer-moore.obj `if test -f '$(srcdir)/boyer-moore.c'; then $(CYGPATH_W) '$(srcdir)/boyer-moore.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/boyer-moore.c'; fi`
+
+mpi_gsnap-changepoint.o: $(srcdir)/changepoint.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-changepoint.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-changepoint.Tpo -c -o mpi_gsnap-changepoint.o `test -f '$(srcdir)/changepoint.c' || echo '$(srcdir)/'`$(srcdir)/changepoint.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-changepoint.Tpo $(DEPDIR)/mpi_gsnap-changepoint.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/changepoint.c' object='mpi_gsnap-changepoint.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-changepoint.o `test -f '$(srcdir)/changepoint.c' || echo '$(srcdir)/'`$(srcdir)/changepoint.c
+
+mpi_gsnap-changepoint.obj: $(srcdir)/changepoint.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-changepoint.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-changepoint.Tpo -c -o mpi_gsnap-changepoint.obj `if test -f '$(srcdir)/changepoint.c'; then $(CYGPATH_W) '$(srcdir)/changepoint.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/changepoint.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-changepoint.Tpo $(DEPDIR)/mpi_gsnap-changepoint.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/changepoint.c' object='mpi_gsnap-changepoint.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-changepoint.obj `if test -f '$(srcdir)/changepoint.c'; then $(CYGPATH_W) '$(srcdir)/changepoint.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/changepoint.c'; fi`
+
+mpi_gsnap-pbinom.o: $(srcdir)/pbinom.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-pbinom.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-pbinom.Tpo -c -o mpi_gsnap-pbinom.o `test -f '$(srcdir)/pbinom.c' || echo '$(srcdir)/'`$(srcdir)/pbinom.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-pbinom.Tpo $(DEPDIR)/mpi_gsnap-pbinom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/pbinom.c' object='mpi_gsnap-pbinom.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-pbinom.o `test -f '$(srcdir)/pbinom.c' || echo '$(srcdir)/'`$(srcdir)/pbinom.c
+
+mpi_gsnap-pbinom.obj: $(srcdir)/pbinom.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-pbinom.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-pbinom.Tpo -c -o mpi_gsnap-pbinom.obj `if test -f '$(srcdir)/pbinom.c'; then $(CYGPATH_W) '$(srcdir)/pbinom.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/pbinom.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-pbinom.Tpo $(DEPDIR)/mpi_gsnap-pbinom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/pbinom.c' object='mpi_gsnap-pbinom.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-pbinom.obj `if test -f '$(srcdir)/pbinom.c'; then $(CYGPATH_W) '$(srcdir)/pbinom.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/pbinom.c'; fi`
+
+mpi_gsnap-dynprog.o: $(srcdir)/dynprog.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-dynprog.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-dynprog.Tpo -c -o mpi_gsnap-dynprog.o `test -f '$(srcdir)/dynprog.c' || echo '$(srcdir)/'`$(srcdir)/dynprog.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-dynprog.Tpo $(DEPDIR)/mpi_gsnap-dynprog.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/dynprog.c' object='mpi_gsnap-dynprog.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-dynprog.o `test -f '$(srcdir)/dynprog.c' || echo '$(srcdir)/'`$(srcdir)/dynprog.c
+
+mpi_gsnap-dynprog.obj: $(srcdir)/dynprog.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-dynprog.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-dynprog.Tpo -c -o mpi_gsnap-dynprog.obj `if test -f '$(srcdir)/dynprog.c'; then $(CYGPATH_W) '$(srcdir)/dynprog.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/dynprog.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-dynprog.Tpo $(DEPDIR)/mpi_gsnap-dynprog.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/dynprog.c' object='mpi_gsnap-dynprog.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-dynprog.obj `if test -f '$(srcdir)/dynprog.c'; then $(CYGPATH_W) '$(srcdir)/dynprog.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/dynprog.c'; fi`
+
+mpi_gsnap-dynprog_simd.o: $(srcdir)/dynprog_simd.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-dynprog_simd.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-dynprog_simd.Tpo -c -o mpi_gsnap-dynprog_simd.o `test -f '$(srcdir)/dynprog_simd.c' || echo '$(srcdir)/'`$(srcdir)/dynprog_simd.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-dynprog_simd.Tpo $(DEPDIR)/mpi_gsnap-dynprog_simd.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/dynprog_simd.c' object='mpi_gsnap-dynprog_simd.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-dynprog_simd.o `test -f '$(srcdir)/dynprog_simd.c' || echo '$(srcdir)/'`$(srcdir)/dynprog_simd.c
+
+mpi_gsnap-dynprog_simd.obj: $(srcdir)/dynprog_simd.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-dynprog_simd.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-dynprog_simd.Tpo -c -o mpi_gsnap-dynprog_simd.obj `if test -f '$(srcdir)/dynprog_simd.c'; then $(CYGPATH_W) '$(srcdir)/dynprog_simd.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/dynprog_simd.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-dynprog_simd.Tpo $(DEPDIR)/mpi_gsnap-dynprog_simd.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/dynprog_simd.c' object='mpi_gsnap-dynprog_simd.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-dynprog_simd.obj `if test -f '$(srcdir)/dynprog_simd.c'; then $(CYGPATH_W) '$(srcdir)/dynprog_simd.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/dynprog_simd.c'; fi`
+
+mpi_gsnap-dynprog_single.o: $(srcdir)/dynprog_single.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-dynprog_single.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-dynprog_single.Tpo -c -o mpi_gsnap-dynprog_single.o `test -f '$(srcdir)/dynprog_single.c' || echo '$(srcdir)/'`$(srcdir)/dynprog_single.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-dynprog_single.Tpo $(DEPDIR)/mpi_gsnap-dynprog_single.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/dynprog_single.c' object='mpi_gsnap-dynprog_single.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-dynprog_single.o `test -f '$(srcdir)/dynprog_single.c' || echo '$(srcdir)/'`$(srcdir)/dynprog_single.c
+
+mpi_gsnap-dynprog_single.obj: $(srcdir)/dynprog_single.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-dynprog_single.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-dynprog_single.Tpo -c -o mpi_gsnap-dynprog_single.obj `if test -f '$(srcdir)/dynprog_single.c'; then $(CYGPATH_W) '$(srcdir)/dynprog_single.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/dynprog_single.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-dynprog_single.Tpo $(DEPDIR)/mpi_gsnap-dynprog_single.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/dynprog_single.c' object='mpi_gsnap-dynprog_single.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-dynprog_single.obj `if test -f '$(srcdir)/dynprog_single.c'; then $(CYGPATH_W) '$(srcdir)/dynprog_single.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/dynprog_single.c'; fi`
+
+mpi_gsnap-dynprog_genome.o: $(srcdir)/dynprog_genome.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-dynprog_genome.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-dynprog_genome.Tpo -c -o mpi_gsnap-dynprog_genome.o `test -f '$(srcdir)/dynprog_genome.c' || echo '$(srcdir)/'`$(srcdir)/dynprog_genome.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-dynprog_genome.Tpo $(DEPDIR)/mpi_gsnap-dynprog_genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/dynprog_genome.c' object='mpi_gsnap-dynprog_genome.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-dynprog_genome.o `test -f '$(srcdir)/dynprog_genome.c' || echo '$(srcdir)/'`$(srcdir)/dynprog_genome.c
+
+mpi_gsnap-dynprog_genome.obj: $(srcdir)/dynprog_genome.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-dynprog_genome.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-dynprog_genome.Tpo -c -o mpi_gsnap-dynprog_genome.obj `if test -f '$(srcdir)/dynprog_genome.c'; then $(CYGPATH_W) '$(srcdir)/dynprog_genome.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/dynprog_genome.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-dynprog_genome.Tpo $(DEPDIR)/mpi_gsnap-dynprog_genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/dynprog_genome.c' object='mpi_gsnap-dynprog_genome.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-dynprog_genome.obj `if test -f '$(srcdir)/dynprog_genome.c'; then $(CYGPATH_W) '$(srcdir)/dynprog_genome.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/dynprog_genome.c'; fi`
+
+mpi_gsnap-dynprog_cdna.o: $(srcdir)/dynprog_cdna.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-dynprog_cdna.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-dynprog_cdna.Tpo -c -o mpi_gsnap-dynprog_cdna.o `test -f '$(srcdir)/dynprog_cdna.c' || echo '$(srcdir)/'`$(srcdir)/dynprog_cdna.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-dynprog_cdna.Tpo $(DEPDIR)/mpi_gsnap-dynprog_cdna.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/dynprog_cdna.c' object='mpi_gsnap-dynprog_cdna.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-dynprog_cdna.o `test -f '$(srcdir)/dynprog_cdna.c' || echo '$(srcdir)/'`$(srcdir)/dynprog_cdna.c
+
+mpi_gsnap-dynprog_cdna.obj: $(srcdir)/dynprog_cdna.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-dynprog_cdna.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-dynprog_cdna.Tpo -c -o mpi_gsnap-dynprog_cdna.obj `if test -f '$(srcdir)/dynprog_cdna.c'; then $(CYGPATH_W) '$(srcdir)/dynprog_cdna.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/dynprog_cdna.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-dynprog_cdna.Tpo $(DEPDIR)/mpi_gsnap-dynprog_cdna.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/dynprog_cdna.c' object='mpi_gsnap-dynprog_cdna.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-dynprog_cdna.obj `if test -f '$(srcdir)/dynprog_cdna.c'; then $(CYGPATH_W) '$(srcdir)/dynprog_cdna.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/dynprog_cdna.c'; fi`
+
+mpi_gsnap-dynprog_end.o: $(srcdir)/dynprog_end.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-dynprog_end.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-dynprog_end.Tpo -c -o mpi_gsnap-dynprog_end.o `test -f '$(srcdir)/dynprog_end.c' || echo '$(srcdir)/'`$(srcdir)/dynprog_end.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-dynprog_end.Tpo $(DEPDIR)/mpi_gsnap-dynprog_end.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/dynprog_end.c' object='mpi_gsnap-dynprog_end.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-dynprog_end.o `test -f '$(srcdir)/dynprog_end.c' || echo '$(srcdir)/'`$(srcdir)/dynprog_end.c
+
+mpi_gsnap-dynprog_end.obj: $(srcdir)/dynprog_end.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-dynprog_end.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-dynprog_end.Tpo -c -o mpi_gsnap-dynprog_end.obj `if test -f '$(srcdir)/dynprog_end.c'; then $(CYGPATH_W) '$(srcdir)/dynprog_end.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/dynprog_end.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-dynprog_end.Tpo $(DEPDIR)/mpi_gsnap-dynprog_end.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/dynprog_end.c' object='mpi_gsnap-dynprog_end.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-dynprog_end.obj `if test -f '$(srcdir)/dynprog_end.c'; then $(CYGPATH_W) '$(srcdir)/dynprog_end.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/dynprog_end.c'; fi`
+
+mpi_gsnap-gbuffer.o: $(srcdir)/gbuffer.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-gbuffer.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-gbuffer.Tpo -c -o mpi_gsnap-gbuffer.o `test -f '$(srcdir)/gbuffer.c' || echo '$(srcdir)/'`$(srcdir)/gbuffer.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-gbuffer.Tpo $(DEPDIR)/mpi_gsnap-gbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/gbuffer.c' object='mpi_gsnap-gbuffer.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-gbuffer.o `test -f '$(srcdir)/gbuffer.c' || echo '$(srcdir)/'`$(srcdir)/gbuffer.c
+
+mpi_gsnap-gbuffer.obj: $(srcdir)/gbuffer.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-gbuffer.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-gbuffer.Tpo -c -o mpi_gsnap-gbuffer.obj `if test -f '$(srcdir)/gbuffer.c'; then $(CYGPATH_W) '$(srcdir)/gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/gbuffer.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-gbuffer.Tpo $(DEPDIR)/mpi_gsnap-gbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/gbuffer.c' object='mpi_gsnap-gbuffer.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-gbuffer.obj `if test -f '$(srcdir)/gbuffer.c'; then $(CYGPATH_W) '$(srcdir)/gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/gbuffer.c'; fi`
+
+mpi_gsnap-translation.o: $(srcdir)/translation.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-translation.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-translation.Tpo -c -o mpi_gsnap-translation.o `test -f '$(srcdir)/translation.c' || echo '$(srcdir)/'`$(srcdir)/translation.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-translation.Tpo $(DEPDIR)/mpi_gsnap-translation.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/translation.c' object='mpi_gsnap-translation.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-translation.o `test -f '$(srcdir)/translation.c' || echo '$(srcdir)/'`$(srcdir)/translation.c
+
+mpi_gsnap-translation.obj: $(srcdir)/translation.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-translation.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-translation.Tpo -c -o mpi_gsnap-translation.obj `if test -f '$(srcdir)/translation.c'; then $(CYGPATH_W) '$(srcdir)/translation.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/translation.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-translation.Tpo $(DEPDIR)/mpi_gsnap-translation.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/translation.c' object='mpi_gsnap-translation.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-translation.obj `if test -f '$(srcdir)/translation.c'; then $(CYGPATH_W) '$(srcdir)/translation.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/translation.c'; fi`
+
+mpi_gsnap-doublelist.o: $(srcdir)/doublelist.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-doublelist.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-doublelist.Tpo -c -o mpi_gsnap-doublelist.o `test -f '$(srcdir)/doublelist.c' || echo '$(srcdir)/'`$(srcdir)/doublelist.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-doublelist.Tpo $(DEPDIR)/mpi_gsnap-doublelist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/doublelist.c' object='mpi_gsnap-doublelist.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-doublelist.o `test -f '$(srcdir)/doublelist.c' || echo '$(srcdir)/'`$(srcdir)/doublelist.c
+
+mpi_gsnap-doublelist.obj: $(srcdir)/doublelist.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-doublelist.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-doublelist.Tpo -c -o mpi_gsnap-doublelist.obj `if test -f '$(srcdir)/doublelist.c'; then $(CYGPATH_W) '$(srcdir)/doublelist.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/doublelist.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-doublelist.Tpo $(DEPDIR)/mpi_gsnap-doublelist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/doublelist.c' object='mpi_gsnap-doublelist.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-doublelist.obj `if test -f '$(srcdir)/doublelist.c'; then $(CYGPATH_W) '$(srcdir)/doublelist.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/doublelist.c'; fi`
+
+mpi_gsnap-smooth.o: $(srcdir)/smooth.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-smooth.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-smooth.Tpo -c -o mpi_gsnap-smooth.o `test -f '$(srcdir)/smooth.c' || echo '$(srcdir)/'`$(srcdir)/smooth.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-smooth.Tpo $(DEPDIR)/mpi_gsnap-smooth.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/smooth.c' object='mpi_gsnap-smooth.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-smooth.o `test -f '$(srcdir)/smooth.c' || echo '$(srcdir)/'`$(srcdir)/smooth.c
+
+mpi_gsnap-smooth.obj: $(srcdir)/smooth.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-smooth.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-smooth.Tpo -c -o mpi_gsnap-smooth.obj `if test -f '$(srcdir)/smooth.c'; then $(CYGPATH_W) '$(srcdir)/smooth.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/smooth.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-smooth.Tpo $(DEPDIR)/mpi_gsnap-smooth.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/smooth.c' object='mpi_gsnap-smooth.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-smooth.obj `if test -f '$(srcdir)/smooth.c'; then $(CYGPATH_W) '$(srcdir)/smooth.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/smooth.c'; fi`
+
+mpi_gsnap-chimera.o: $(srcdir)/chimera.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-chimera.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-chimera.Tpo -c -o mpi_gsnap-chimera.o `test -f '$(srcdir)/chimera.c' || echo '$(srcdir)/'`$(srcdir)/chimera.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-chimera.Tpo $(DEPDIR)/mpi_gsnap-chimera.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/chimera.c' object='mpi_gsnap-chimera.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-chimera.o `test -f '$(srcdir)/chimera.c' || echo '$(srcdir)/'`$(srcdir)/chimera.c
+
+mpi_gsnap-chimera.obj: $(srcdir)/chimera.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-chimera.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-chimera.Tpo -c -o mpi_gsnap-chimera.obj `if test -f '$(srcdir)/chimera.c'; then $(CYGPATH_W) '$(srcdir)/chimera.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/chimera.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-chimera.Tpo $(DEPDIR)/mpi_gsnap-chimera.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/chimera.c' object='mpi_gsnap-chimera.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-chimera.obj `if test -f '$(srcdir)/chimera.c'; then $(CYGPATH_W) '$(srcdir)/chimera.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/chimera.c'; fi`
+
+mpi_gsnap-stage3.o: $(srcdir)/stage3.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-stage3.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-stage3.Tpo -c -o mpi_gsnap-stage3.o `test -f '$(srcdir)/stage3.c' || echo '$(srcdir)/'`$(srcdir)/stage3.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-stage3.Tpo $(DEPDIR)/mpi_gsnap-stage3.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/stage3.c' object='mpi_gsnap-stage3.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-stage3.o `test -f '$(srcdir)/stage3.c' || echo '$(srcdir)/'`$(srcdir)/stage3.c
+
+mpi_gsnap-stage3.obj: $(srcdir)/stage3.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-stage3.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-stage3.Tpo -c -o mpi_gsnap-stage3.obj `if test -f '$(srcdir)/stage3.c'; then $(CYGPATH_W) '$(srcdir)/stage3.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/stage3.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-stage3.Tpo $(DEPDIR)/mpi_gsnap-stage3.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/stage3.c' object='mpi_gsnap-stage3.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-stage3.obj `if test -f '$(srcdir)/stage3.c'; then $(CYGPATH_W) '$(srcdir)/stage3.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/stage3.c'; fi`
+
+mpi_gsnap-splicestringpool.o: $(srcdir)/splicestringpool.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-splicestringpool.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-splicestringpool.Tpo -c -o mpi_gsnap-splicestringpool.o `test -f '$(srcdir)/splicestringpool.c' || echo '$(srcdir)/'`$(srcdir)/splicestringpool.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-splicestringpool.Tpo $(DEPDIR)/mpi_gsnap-splicestringpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/splicestringpool.c' object='mpi_gsnap-splicestringpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-splicestringpool.o `test -f '$(srcdir)/splicestringpool.c' || echo '$(srcdir)/'`$(srcdir)/splicestringpool.c
+
+mpi_gsnap-splicestringpool.obj: $(srcdir)/splicestringpool.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-splicestringpool.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-splicestringpool.Tpo -c -o mpi_gsnap-splicestringpool.obj `if test -f '$(srcdir)/splicestringpool.c'; then $(CYGPATH_W) '$(srcdir)/splicestringpool.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/splicestringpool.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-splicestringpool.Tpo $(DEPDIR)/mpi_gsnap-splicestringpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/splicestringpool.c' object='mpi_gsnap-splicestringpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-splicestringpool.obj `if test -f '$(srcdir)/splicestringpool.c'; then $(CYGPATH_W) '$(srcdir)/splicestringpool.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/splicestringpool.c'; fi`
+
+mpi_gsnap-splicetrie_build.o: $(srcdir)/splicetrie_build.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-splicetrie_build.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-splicetrie_build.Tpo -c -o mpi_gsnap-splicetrie_build.o `test -f '$(srcdir)/splicetrie_build.c' || echo '$(srcdir)/'`$(srcdir)/splicetrie_build.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-splicetrie_build.Tpo $(DEPDIR)/mpi_gsnap-splicetrie_build.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/splicetrie_build.c' object='mpi_gsnap-splicetrie_build.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-splicetrie_build.o `test -f '$(srcdir)/splicetrie_build.c' || echo '$(srcdir)/'`$(srcdir)/splicetrie_build.c
+
+mpi_gsnap-splicetrie_build.obj: $(srcdir)/splicetrie_build.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-splicetrie_build.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-splicetrie_build.Tpo -c -o mpi_gsnap-splicetrie_build.obj `if test -f '$(srcdir)/splicetrie_build.c'; then $(CYGPATH_W) '$(srcdir)/splicetrie_build.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/splicetrie_build.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-splicetrie_build.Tpo $(DEPDIR)/mpi_gsnap-splicetrie_build.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/splicetrie_build.c' object='mpi_gsnap-splicetrie_build.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-splicetrie_build.obj `if test -f '$(srcdir)/splicetrie_build.c'; then $(CYGPATH_W) '$(srcdir)/splicetrie_build.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/splicetrie_build.c'; fi`
+
+mpi_gsnap-splicetrie.o: $(srcdir)/splicetrie.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-splicetrie.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-splicetrie.Tpo -c -o mpi_gsnap-splicetrie.o `test -f '$(srcdir)/splicetrie.c' || echo '$(srcdir)/'`$(srcdir)/splicetrie.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-splicetrie.Tpo $(DEPDIR)/mpi_gsnap-splicetrie.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/splicetrie.c' object='mpi_gsnap-splicetrie.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-splicetrie.o `test -f '$(srcdir)/splicetrie.c' || echo '$(srcdir)/'`$(srcdir)/splicetrie.c
+
+mpi_gsnap-splicetrie.obj: $(srcdir)/splicetrie.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-splicetrie.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-splicetrie.Tpo -c -o mpi_gsnap-splicetrie.obj `if test -f '$(srcdir)/splicetrie.c'; then $(CYGPATH_W) '$(srcdir)/splicetrie.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/splicetrie.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-splicetrie.Tpo $(DEPDIR)/mpi_gsnap-splicetrie.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/splicetrie.c' object='mpi_gsnap-splicetrie.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-splicetrie.obj `if test -f '$(srcdir)/splicetrie.c'; then $(CYGPATH_W) '$(srcdir)/splicetrie.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/splicetrie.c'; fi`
+
+mpi_gsnap-splice.o: $(srcdir)/splice.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-splice.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-splice.Tpo -c -o mpi_gsnap-splice.o `test -f '$(srcdir)/splice.c' || echo '$(srcdir)/'`$(srcdir)/splice.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-splice.Tpo $(DEPDIR)/mpi_gsnap-splice.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/splice.c' object='mpi_gsnap-splice.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-splice.o `test -f '$(srcdir)/splice.c' || echo '$(srcdir)/'`$(srcdir)/splice.c
+
+mpi_gsnap-splice.obj: $(srcdir)/splice.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-splice.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-splice.Tpo -c -o mpi_gsnap-splice.obj `if test -f '$(srcdir)/splice.c'; then $(CYGPATH_W) '$(srcdir)/splice.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/splice.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-splice.Tpo $(DEPDIR)/mpi_gsnap-splice.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/splice.c' object='mpi_gsnap-splice.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-splice.obj `if test -f '$(srcdir)/splice.c'; then $(CYGPATH_W) '$(srcdir)/splice.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/splice.c'; fi`
+
+mpi_gsnap-indel.o: $(srcdir)/indel.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-indel.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-indel.Tpo -c -o mpi_gsnap-indel.o `test -f '$(srcdir)/indel.c' || echo '$(srcdir)/'`$(srcdir)/indel.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-indel.Tpo $(DEPDIR)/mpi_gsnap-indel.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/indel.c' object='mpi_gsnap-indel.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-indel.o `test -f '$(srcdir)/indel.c' || echo '$(srcdir)/'`$(srcdir)/indel.c
+
+mpi_gsnap-indel.obj: $(srcdir)/indel.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-indel.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-indel.Tpo -c -o mpi_gsnap-indel.obj `if test -f '$(srcdir)/indel.c'; then $(CYGPATH_W) '$(srcdir)/indel.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/indel.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-indel.Tpo $(DEPDIR)/mpi_gsnap-indel.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/indel.c' object='mpi_gsnap-indel.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-indel.obj `if test -f '$(srcdir)/indel.c'; then $(CYGPATH_W) '$(srcdir)/indel.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/indel.c'; fi`
+
+mpi_gsnap-bitpack64-access.o: $(srcdir)/bitpack64-access.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-bitpack64-access.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-bitpack64-access.Tpo -c -o mpi_gsnap-bitpack64-access.o `test -f '$(srcdir)/bitpack64-access.c' || echo '$(srcdir)/'`$(srcdir)/bitpack64-access.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-bitpack64-access.Tpo $(DEPDIR)/mpi_gsnap-bitpack64-access.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/bitpack64-access.c' object='mpi_gsnap-bitpack64-access.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-bitpack64-access.o `test -f '$(srcdir)/bitpack64-access.c' || echo '$(srcdir)/'`$(srcdir)/bitpack64-access.c
+
+mpi_gsnap-bitpack64-access.obj: $(srcdir)/bitpack64-access.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-bitpack64-access.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-bitpack64-access.Tpo -c -o mpi_gsnap-bitpack64-access.obj `if test -f '$(srcdir)/bitpack64-access.c'; then $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/bitpack64-access.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-bitpack64-access.Tpo $(DEPDIR)/mpi_gsnap-bitpack64-access.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/bitpack64-access.c' object='mpi_gsnap-bitpack64-access.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-bitpack64-access.obj `if test -f '$(srcdir)/bitpack64-access.c'; then $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/bitpack64-access.c'; fi`
+
+mpi_gsnap-bytecoding.o: $(srcdir)/bytecoding.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-bytecoding.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-bytecoding.Tpo -c -o mpi_gsnap-bytecoding.o `test -f '$(srcdir)/bytecoding.c' || echo '$(srcdir)/'`$(srcdir)/bytecoding.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-bytecoding.Tpo $(DEPDIR)/mpi_gsnap-bytecoding.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/bytecoding.c' object='mpi_gsnap-bytecoding.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-bytecoding.o `test -f '$(srcdir)/bytecoding.c' || echo '$(srcdir)/'`$(srcdir)/bytecoding.c
+
+mpi_gsnap-bytecoding.obj: $(srcdir)/bytecoding.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-bytecoding.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-bytecoding.Tpo -c -o mpi_gsnap-bytecoding.obj `if test -f '$(srcdir)/bytecoding.c'; then $(CYGPATH_W) '$(srcdir)/bytecoding.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/bytecoding.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-bytecoding.Tpo $(DEPDIR)/mpi_gsnap-bytecoding.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/bytecoding.c' object='mpi_gsnap-bytecoding.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-bytecoding.obj `if test -f '$(srcdir)/bytecoding.c'; then $(CYGPATH_W) '$(srcdir)/bytecoding.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/bytecoding.c'; fi`
+
+mpi_gsnap-univdiag.o: $(srcdir)/univdiag.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-univdiag.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-univdiag.Tpo -c -o mpi_gsnap-univdiag.o `test -f '$(srcdir)/univdiag.c' || echo '$(srcdir)/'`$(srcdir)/univdiag.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-univdiag.Tpo $(DEPDIR)/mpi_gsnap-univdiag.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/univdiag.c' object='mpi_gsnap-univdiag.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-univdiag.o `test -f '$(srcdir)/univdiag.c' || echo '$(srcdir)/'`$(srcdir)/univdiag.c
+
+mpi_gsnap-univdiag.obj: $(srcdir)/univdiag.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-univdiag.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-univdiag.Tpo -c -o mpi_gsnap-univdiag.obj `if test -f '$(srcdir)/univdiag.c'; then $(CYGPATH_W) '$(srcdir)/univdiag.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/univdiag.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-univdiag.Tpo $(DEPDIR)/mpi_gsnap-univdiag.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/univdiag.c' object='mpi_gsnap-univdiag.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-univdiag.obj `if test -f '$(srcdir)/univdiag.c'; then $(CYGPATH_W) '$(srcdir)/univdiag.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/univdiag.c'; fi`
+
+mpi_gsnap-sarray-read.o: $(srcdir)/sarray-read.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-sarray-read.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-sarray-read.Tpo -c -o mpi_gsnap-sarray-read.o `test -f '$(srcdir)/sarray-read.c' || echo '$(srcdir)/'`$(srcdir)/sarray-read.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-sarray-read.Tpo $(DEPDIR)/mpi_gsnap-sarray-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/sarray-read.c' object='mpi_gsnap-sarray-read.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-sarray-read.o `test -f '$(srcdir)/sarray-read.c' || echo '$(srcdir)/'`$(srcdir)/sarray-read.c
+
+mpi_gsnap-sarray-read.obj: $(srcdir)/sarray-read.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-sarray-read.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-sarray-read.Tpo -c -o mpi_gsnap-sarray-read.obj `if test -f '$(srcdir)/sarray-read.c'; then $(CYGPATH_W) '$(srcdir)/sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/sarray-read.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-sarray-read.Tpo $(DEPDIR)/mpi_gsnap-sarray-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/sarray-read.c' object='mpi_gsnap-sarray-read.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-sarray-read.obj `if test -f '$(srcdir)/sarray-read.c'; then $(CYGPATH_W) '$(srcdir)/sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/sarray-read.c'; fi`
+
+mpi_gsnap-stage1hr.o: $(srcdir)/stage1hr.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-stage1hr.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-stage1hr.Tpo -c -o mpi_gsnap-stage1hr.o `test -f '$(srcdir)/stage1hr.c' || echo '$(srcdir)/'`$(srcdir)/stage1hr.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-stage1hr.Tpo $(DEPDIR)/mpi_gsnap-stage1hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/stage1hr.c' object='mpi_gsnap-stage1hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-stage1hr.o `test -f '$(srcdir)/stage1hr.c' || echo '$(srcdir)/'`$(srcdir)/stage1hr.c
+
+mpi_gsnap-stage1hr.obj: $(srcdir)/stage1hr.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-stage1hr.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-stage1hr.Tpo -c -o mpi_gsnap-stage1hr.obj `if test -f '$(srcdir)/stage1hr.c'; then $(CYGPATH_W) '$(srcdir)/stage1hr.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/stage1hr.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-stage1hr.Tpo $(DEPDIR)/mpi_gsnap-stage1hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/stage1hr.c' object='mpi_gsnap-stage1hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-stage1hr.obj `if test -f '$(srcdir)/stage1hr.c'; then $(CYGPATH_W) '$(srcdir)/stage1hr.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/stage1hr.c'; fi`
+
+mpi_gsnap-request.o: $(srcdir)/request.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-request.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-request.Tpo -c -o mpi_gsnap-request.o `test -f '$(srcdir)/request.c' || echo '$(srcdir)/'`$(srcdir)/request.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-request.Tpo $(DEPDIR)/mpi_gsnap-request.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/request.c' object='mpi_gsnap-request.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-request.o `test -f '$(srcdir)/request.c' || echo '$(srcdir)/'`$(srcdir)/request.c
+
+mpi_gsnap-request.obj: $(srcdir)/request.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-request.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-request.Tpo -c -o mpi_gsnap-request.obj `if test -f '$(srcdir)/request.c'; then $(CYGPATH_W) '$(srcdir)/request.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/request.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-request.Tpo $(DEPDIR)/mpi_gsnap-request.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/request.c' object='mpi_gsnap-request.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-request.obj `if test -f '$(srcdir)/request.c'; then $(CYGPATH_W) '$(srcdir)/request.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/request.c'; fi`
+
+mpi_gsnap-resulthr.o: $(srcdir)/resulthr.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-resulthr.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-resulthr.Tpo -c -o mpi_gsnap-resulthr.o `test -f '$(srcdir)/resulthr.c' || echo '$(srcdir)/'`$(srcdir)/resulthr.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-resulthr.Tpo $(DEPDIR)/mpi_gsnap-resulthr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/resulthr.c' object='mpi_gsnap-resulthr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-resulthr.o `test -f '$(srcdir)/resulthr.c' || echo '$(srcdir)/'`$(srcdir)/resulthr.c
+
+mpi_gsnap-resulthr.obj: $(srcdir)/resulthr.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-resulthr.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-resulthr.Tpo -c -o mpi_gsnap-resulthr.obj `if test -f '$(srcdir)/resulthr.c'; then $(CYGPATH_W) '$(srcdir)/resulthr.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/resulthr.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-resulthr.Tpo $(DEPDIR)/mpi_gsnap-resulthr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/resulthr.c' object='mpi_gsnap-resulthr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-resulthr.obj `if test -f '$(srcdir)/resulthr.c'; then $(CYGPATH_W) '$(srcdir)/resulthr.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/resulthr.c'; fi`
+
+mpi_gsnap-output.o: $(srcdir)/output.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-output.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-output.Tpo -c -o mpi_gsnap-output.o `test -f '$(srcdir)/output.c' || echo '$(srcdir)/'`$(srcdir)/output.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-output.Tpo $(DEPDIR)/mpi_gsnap-output.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/output.c' object='mpi_gsnap-output.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-output.o `test -f '$(srcdir)/output.c' || echo '$(srcdir)/'`$(srcdir)/output.c
+
+mpi_gsnap-output.obj: $(srcdir)/output.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-output.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-output.Tpo -c -o mpi_gsnap-output.obj `if test -f '$(srcdir)/output.c'; then $(CYGPATH_W) '$(srcdir)/output.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/output.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-output.Tpo $(DEPDIR)/mpi_gsnap-output.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/output.c' object='mpi_gsnap-output.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-output.obj `if test -f '$(srcdir)/output.c'; then $(CYGPATH_W) '$(srcdir)/output.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/output.c'; fi`
+
+mpi_gsnap-master.o: $(srcdir)/master.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-master.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-master.Tpo -c -o mpi_gsnap-master.o `test -f '$(srcdir)/master.c' || echo '$(srcdir)/'`$(srcdir)/master.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-master.Tpo $(DEPDIR)/mpi_gsnap-master.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/master.c' object='mpi_gsnap-master.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-master.o `test -f '$(srcdir)/master.c' || echo '$(srcdir)/'`$(srcdir)/master.c
+
+mpi_gsnap-master.obj: $(srcdir)/master.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-master.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-master.Tpo -c -o mpi_gsnap-master.obj `if test -f '$(srcdir)/master.c'; then $(CYGPATH_W) '$(srcdir)/master.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/master.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-master.Tpo $(DEPDIR)/mpi_gsnap-master.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/master.c' object='mpi_gsnap-master.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-master.obj `if test -f '$(srcdir)/master.c'; then $(CYGPATH_W) '$(srcdir)/master.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/master.c'; fi`
+
+mpi_gsnap-inbuffer.o: $(srcdir)/inbuffer.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-inbuffer.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-inbuffer.Tpo -c -o mpi_gsnap-inbuffer.o `test -f '$(srcdir)/inbuffer.c' || echo '$(srcdir)/'`$(srcdir)/inbuffer.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-inbuffer.Tpo $(DEPDIR)/mpi_gsnap-inbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/inbuffer.c' object='mpi_gsnap-inbuffer.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-inbuffer.o `test -f '$(srcdir)/inbuffer.c' || echo '$(srcdir)/'`$(srcdir)/inbuffer.c
+
+mpi_gsnap-inbuffer.obj: $(srcdir)/inbuffer.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-inbuffer.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-inbuffer.Tpo -c -o mpi_gsnap-inbuffer.obj `if test -f '$(srcdir)/inbuffer.c'; then $(CYGPATH_W) '$(srcdir)/inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/inbuffer.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-inbuffer.Tpo $(DEPDIR)/mpi_gsnap-inbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/inbuffer.c' object='mpi_gsnap-inbuffer.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-inbuffer.obj `if test -f '$(srcdir)/inbuffer.c'; then $(CYGPATH_W) '$(srcdir)/inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/inbuffer.c'; fi`
+
+mpi_gsnap-samheader.o: $(srcdir)/samheader.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-samheader.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-samheader.Tpo -c -o mpi_gsnap-samheader.o `test -f '$(srcdir)/samheader.c' || echo '$(srcdir)/'`$(srcdir)/samheader.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-samheader.Tpo $(DEPDIR)/mpi_gsnap-samheader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/samheader.c' object='mpi_gsnap-samheader.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-samheader.o `test -f '$(srcdir)/samheader.c' || echo '$(srcdir)/'`$(srcdir)/samheader.c
+
+mpi_gsnap-samheader.obj: $(srcdir)/samheader.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-samheader.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-samheader.Tpo -c -o mpi_gsnap-samheader.obj `if test -f '$(srcdir)/samheader.c'; then $(CYGPATH_W) '$(srcdir)/samheader.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/samheader.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-samheader.Tpo $(DEPDIR)/mpi_gsnap-samheader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/samheader.c' object='mpi_gsnap-samheader.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-samheader.obj `if test -f '$(srcdir)/samheader.c'; then $(CYGPATH_W) '$(srcdir)/samheader.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/samheader.c'; fi`
+
+mpi_gsnap-outbuffer.o: $(srcdir)/outbuffer.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-outbuffer.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-outbuffer.Tpo -c -o mpi_gsnap-outbuffer.o `test -f '$(srcdir)/outbuffer.c' || echo '$(srcdir)/'`$(srcdir)/outbuffer.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-outbuffer.Tpo $(DEPDIR)/mpi_gsnap-outbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/outbuffer.c' object='mpi_gsnap-outbuffer.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-outbuffer.o `test -f '$(srcdir)/outbuffer.c' || echo '$(srcdir)/'`$(srcdir)/outbuffer.c
+
+mpi_gsnap-outbuffer.obj: $(srcdir)/outbuffer.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-outbuffer.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-outbuffer.Tpo -c -o mpi_gsnap-outbuffer.obj `if test -f '$(srcdir)/outbuffer.c'; then $(CYGPATH_W) '$(srcdir)/outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/outbuffer.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-outbuffer.Tpo $(DEPDIR)/mpi_gsnap-outbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/outbuffer.c' object='mpi_gsnap-outbuffer.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-outbuffer.obj `if test -f '$(srcdir)/outbuffer.c'; then $(CYGPATH_W) '$(srcdir)/outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/outbuffer.c'; fi`
+
+mpi_gsnap-datadir.o: $(srcdir)/datadir.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-datadir.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-datadir.Tpo -c -o mpi_gsnap-datadir.o `test -f '$(srcdir)/datadir.c' || echo '$(srcdir)/'`$(srcdir)/datadir.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-datadir.Tpo $(DEPDIR)/mpi_gsnap-datadir.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/datadir.c' object='mpi_gsnap-datadir.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-datadir.o `test -f '$(srcdir)/datadir.c' || echo '$(srcdir)/'`$(srcdir)/datadir.c
+
+mpi_gsnap-datadir.obj: $(srcdir)/datadir.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-datadir.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-datadir.Tpo -c -o mpi_gsnap-datadir.obj `if test -f '$(srcdir)/datadir.c'; then $(CYGPATH_W) '$(srcdir)/datadir.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/datadir.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-datadir.Tpo $(DEPDIR)/mpi_gsnap-datadir.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/datadir.c' object='mpi_gsnap-datadir.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-datadir.obj `if test -f '$(srcdir)/datadir.c'; then $(CYGPATH_W) '$(srcdir)/datadir.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/datadir.c'; fi`
+
+mpi_gsnap-getopt.o: $(srcdir)/getopt.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-getopt.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-getopt.Tpo -c -o mpi_gsnap-getopt.o `test -f '$(srcdir)/getopt.c' || echo '$(srcdir)/'`$(srcdir)/getopt.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-getopt.Tpo $(DEPDIR)/mpi_gsnap-getopt.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/getopt.c' object='mpi_gsnap-getopt.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-getopt.o `test -f '$(srcdir)/getopt.c' || echo '$(srcdir)/'`$(srcdir)/getopt.c
+
+mpi_gsnap-getopt.obj: $(srcdir)/getopt.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-getopt.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-getopt.Tpo -c -o mpi_gsnap-getopt.obj `if test -f '$(srcdir)/getopt.c'; then $(CYGPATH_W) '$(srcdir)/getopt.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/getopt.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-getopt.Tpo $(DEPDIR)/mpi_gsnap-getopt.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/getopt.c' object='mpi_gsnap-getopt.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-getopt.obj `if test -f '$(srcdir)/getopt.c'; then $(CYGPATH_W) '$(srcdir)/getopt.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/getopt.c'; fi`
+
+mpi_gsnap-getopt1.o: $(srcdir)/getopt1.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-getopt1.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-getopt1.Tpo -c -o mpi_gsnap-getopt1.o `test -f '$(srcdir)/getopt1.c' || echo '$(srcdir)/'`$(srcdir)/getopt1.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-getopt1.Tpo $(DEPDIR)/mpi_gsnap-getopt1.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/getopt1.c' object='mpi_gsnap-getopt1.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-getopt1.o `test -f '$(srcdir)/getopt1.c' || echo '$(srcdir)/'`$(srcdir)/getopt1.c
+
+mpi_gsnap-getopt1.obj: $(srcdir)/getopt1.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-getopt1.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-getopt1.Tpo -c -o mpi_gsnap-getopt1.obj `if test -f '$(srcdir)/getopt1.c'; then $(CYGPATH_W) '$(srcdir)/getopt1.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/getopt1.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-getopt1.Tpo $(DEPDIR)/mpi_gsnap-getopt1.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/getopt1.c' object='mpi_gsnap-getopt1.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-getopt1.obj `if test -f '$(srcdir)/getopt1.c'; then $(CYGPATH_W) '$(srcdir)/getopt1.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/getopt1.c'; fi`
+
+mpi_gsnap-gsnap.o: $(srcdir)/gsnap.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-gsnap.o -MD -MP -MF $(DEPDIR)/mpi_gsnap-gsnap.Tpo -c -o mpi_gsnap-gsnap.o `test -f '$(srcdir)/gsnap.c' || echo '$(srcdir)/'`$(srcdir)/gsnap.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-gsnap.Tpo $(DEPDIR)/mpi_gsnap-gsnap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/gsnap.c' object='mpi_gsnap-gsnap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-gsnap.o `test -f '$(srcdir)/gsnap.c' || echo '$(srcdir)/'`$(srcdir)/gsnap.c
+
+mpi_gsnap-gsnap.obj: $(srcdir)/gsnap.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -MT mpi_gsnap-gsnap.obj -MD -MP -MF $(DEPDIR)/mpi_gsnap-gsnap.Tpo -c -o mpi_gsnap-gsnap.obj `if test -f '$(srcdir)/gsnap.c'; then $(CYGPATH_W) '$(srcdir)/gsnap.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/gsnap.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/mpi_gsnap-gsnap.Tpo $(DEPDIR)/mpi_gsnap-gsnap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/gsnap.c' object='mpi_gsnap-gsnap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mpi_gsnap_CFLAGS) $(CFLAGS) -c -o mpi_gsnap-gsnap.obj `if test -f '$(srcdir)/gsnap.c'; then $(CYGPATH_W) '$(srcdir)/gsnap.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/gsnap.c'; fi`
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS)
+installdirs:
+ for dir in "$(DESTDIR)$(bindir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
+ clean-generic clean-libtool ctags distclean distclean-compile \
+ distclean-generic distclean-libtool distclean-tags distdir dvi \
+ dvi-am html html-am info info-am install install-am \
+ install-binPROGRAMS install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am tags uninstall uninstall-am \
+ uninstall-binPROGRAMS
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/ChangeLog b/src/ChangeLog
new file mode 100644
index 0000000..e69de29
diff --git a/src/Makefile.am b/src/Makefile.am
index 83264f5..88b6c45 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,6 +1,8 @@
# Copied from ../util so "gmap_setup -B src" works during "make check"
CLEANFILES = fa_coords gmap_process
+# Include master.c and master.h, so they get included in distribution
+EXTRA_DIST = mpidebug.c mpidebug.h master.c master.h
# -DUTILITYP=1 needed for iit_store, iit_dump, iit_get, gmapindex, and
# get-genome so they can handle both small and large genomes at run
@@ -17,6 +19,7 @@ GMAP_FILES = fopen.h bool.h types.h separator.h comp.h \
littleendian.c littleendian.h bigendian.c bigendian.h \
univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h reader.c reader.h \
genomicpos.c genomicpos.h compress.c compress.h compress-write.c compress-write.h \
@@ -42,7 +45,7 @@ GMAP_FILES = fopen.h bool.h types.h separator.h comp.h \
dynprog_single.c dynprog_single.h dynprog_genome.c dynprog_genome.h dynprog_cdna.c dynprog_cdna.h dynprog_end.c dynprog_end.h \
translation.c translation.h \
pbinom.c pbinom.h changepoint.c changepoint.h sense.h fastlog.h stage3.c stage3.h \
- request.c request.h result.c result.h \
+ request.c request.h result.c result.h output.c output.h \
inbuffer.c inbuffer.h samheader.c samheader.h outbuffer.c outbuffer.h \
chimera.c chimera.h datadir.c datadir.h \
getopt.c getopt1.c getopt.h gmap.c
@@ -64,6 +67,7 @@ GMAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
littleendian.c littleendian.h bigendian.c bigendian.h \
univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h reader.c reader.h \
genomicpos.c genomicpos.h compress.c compress.h compress-write.c compress-write.h \
@@ -89,7 +93,7 @@ GMAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
dynprog_single.c dynprog_single.h dynprog_genome.c dynprog_genome.h dynprog_cdna.c dynprog_cdna.h dynprog_end.c dynprog_end.h \
translation.c translation.h \
pbinom.c pbinom.h changepoint.c changepoint.h sense.h fastlog.h stage3.c stage3.h \
- request.c request.h result.c result.h \
+ request.c request.h result.c result.h output.c output.h \
inbuffer.c inbuffer.h samheader.c samheader.h outbuffer.c outbuffer.h \
chimera.c chimera.h datadir.c datadir.h \
getopt.c getopt1.c getopt.h gmap.c
@@ -113,6 +117,7 @@ GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
univinterval.c univinterval.h interval.c interval.h \
uintlist.c uintlist.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h reader.c reader.h \
genomicpos.c genomicpos.h compress.c compress.h \
@@ -124,8 +129,7 @@ GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
chrom.c chrom.h segmentpos.c segmentpos.h \
chrnum.c chrnum.h \
maxent_hr.c maxent_hr.h samflags.h samprint.c samprint.h \
- mapq.c mapq.h shortread.c shortread.h substring.c substring.h stage3hr.c stage3hr.h \
- goby.c goby.h \
+ mapq.c mapq.h shortread.c shortread.h substring.c substring.h junction.c junction.h stage3hr.c stage3hr.h \
spanningelt.c spanningelt.h cmet.c cmet.h atoi.c atoi.h \
comp.h maxent.c maxent.h pairdef.h pair.c pair.h pairpool.c pairpool.h diag.c diag.h diagpool.c diagpool.h \
orderstat.c orderstat.h oligoindex_hr.c oligoindex_hr.h cellpool.c cellpool.h stage2.c stage2.h \
@@ -137,20 +141,21 @@ GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
chimera.c chimera.h sense.h fastlog.h stage3.c stage3.h \
splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \
splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \
- bytecoding.c bytecoding.h sarray-read.c sarray-read.h \
+ bytecoding.c bytecoding.h univdiagdef.h univdiag.c univdiag.h sarray-read.c sarray-read.h \
stage1hr.c stage1hr.h \
- request.c request.h resulthr.c resulthr.h \
+ request.c request.h resulthr.c resulthr.h output.c output.h \
inbuffer.c inbuffer.h samheader.c samheader.h outbuffer.c outbuffer.h \
datadir.c datadir.h mode.h \
getopt.c getopt1.c getopt.h gsnap.c
# Note: dist_ commands get read by bootstrap, and don't follow the flags
+# -lrt is needed for shm_open
gsnap_CC = $(PTHREAD_CC)
-gsnap_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) $(POPCNT_CFLAGS) $(SIMD_CFLAGS) $(GOBY_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1
-gsnap_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) $(GOBY_LDFLAGS)
-gsnap_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) $(GOBY_LIBS)
+gsnap_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) $(POPCNT_CFLAGS) $(SIMD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1
+gsnap_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
+gsnap_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) -lrt
dist_gsnap_SOURCES = $(GSNAP_FILES)
@@ -162,6 +167,7 @@ GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
univinterval.c univinterval.h interval.c interval.h \
uintlist.c uintlist.h uint8list.c uint8list.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h reader.c reader.h \
genomicpos.c genomicpos.h compress.c compress.h \
@@ -173,8 +179,7 @@ GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
chrom.c chrom.h segmentpos.c segmentpos.h \
chrnum.c chrnum.h \
maxent_hr.c maxent_hr.h samflags.h samprint.c samprint.h \
- mapq.c mapq.h shortread.c shortread.h substring.c substring.h stage3hr.c stage3hr.h \
- goby.c goby.h \
+ mapq.c mapq.h shortread.c shortread.h substring.c substring.h junction.c junction.h stage3hr.c stage3hr.h \
spanningelt.c spanningelt.h cmet.c cmet.h atoi.c atoi.h \
comp.h maxent.c maxent.h pairdef.h pair.c pair.h pairpool.c pairpool.h diag.c diag.h diagpool.c diagpool.h \
orderstat.c orderstat.h oligoindex_hr.c oligoindex_hr.h cellpool.c cellpool.h stage2.c stage2.h \
@@ -187,7 +192,7 @@ GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \
splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \
stage1hr.c stage1hr.h \
- request.c request.h resulthr.c resulthr.h \
+ request.c request.h resulthr.c resulthr.h output.c output.h \
inbuffer.c inbuffer.h samheader.c samheader.h outbuffer.c outbuffer.h \
datadir.c datadir.h mode.h \
getopt.c getopt1.c getopt.h gsnap.c
@@ -196,9 +201,9 @@ GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
# Note: dist_ commands get read by bootstrap, and don't follow the flags
gsnapl_CC = $(PTHREAD_CC)
-gsnapl_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) $(POPCNT_CFLAGS) $(SIMD_CFLAGS) $(GOBY_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1
-gsnapl_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) $(GOBY_LDFLAGS)
-gsnapl_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) $(GOBY_LIBS)
+gsnapl_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) $(POPCNT_CFLAGS) $(SIMD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1
+gsnapl_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
+gsnapl_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnapl_SOURCES = $(GSNAPL_FILES)
@@ -209,6 +214,7 @@ UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
littleendian.c littleendian.h bigendian.c bigendian.h \
univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h reader.c reader.h \
genomicpos.c genomicpos.h compress.c compress.h \
@@ -220,7 +226,7 @@ UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
chrom.c chrom.h segmentpos.c segmentpos.h \
chrnum.c chrnum.h \
maxent_hr.c maxent_hr.h \
- mapq.c mapq.h shortread.c shortread.h substring.c substring.h stage3hr.c stage3hr.h \
+ mapq.c mapq.h shortread.c shortread.h substring.c substring.h junction.c junction.h stage3hr.c stage3hr.h \
spanningelt.c spanningelt.h cmet.c cmet.h atoi.c atoi.h \
comp.h maxent.c maxent.h pairdef.h pair.c pair.h pairpool.c pairpool.h diag.c diag.h diagpool.c diagpool.h \
orderstat.c orderstat.h oligoindex_hr.c oligoindex_hr.h cellpool.c cellpool.h stage2.c stage2.h \
@@ -232,7 +238,7 @@ UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
chimera.c chimera.h sense.h fastlog.h stage3.c stage3.h \
splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \
splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \
- bytecoding.c bytecoding.h sarray-read.c sarray-read.h \
+ bytecoding.c bytecoding.h univdiagdef.h univdiag.c univdiag.h sarray-read.c sarray-read.h \
stage1hr.c stage1hr.h resulthr.c resulthr.h \
datadir.c datadir.h mode.h \
getopt.c getopt1.c getopt.h uniqscan.c
@@ -251,6 +257,7 @@ UNIQSCANL_FILES = fopen.h bool.h types.h separator.h comp.h \
littleendian.c littleendian.h bigendian.c bigendian.h \
univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h uint8list.c uint8list.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h reader.c reader.h \
genomicpos.c genomicpos.h compress.c compress.h \
@@ -262,7 +269,7 @@ UNIQSCANL_FILES = fopen.h bool.h types.h separator.h comp.h \
chrom.c chrom.h segmentpos.c segmentpos.h \
chrnum.c chrnum.h \
maxent_hr.c maxent_hr.h \
- mapq.c mapq.h shortread.c shortread.h substring.c substring.h stage3hr.c stage3hr.h \
+ mapq.c mapq.h shortread.c shortread.h substring.c substring.h junction.c junction.h stage3hr.c stage3hr.h \
spanningelt.c spanningelt.h cmet.c cmet.h atoi.c atoi.h \
comp.h maxent.c maxent.h pairdef.h pair.c pair.h pairpool.c pairpool.h diag.c diag.h diagpool.c diagpool.h \
orderstat.c orderstat.h oligoindex_hr.c oligoindex_hr.h cellpool.c cellpool.h stage2.c stage2.h \
@@ -292,6 +299,7 @@ GMAPINDEX_FILES = fopen.h bool.h types.h \
littleendian.c littleendian.h bigendian.c bigendian.h \
univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iit-write-univ.c iit-write-univ.h \
iitdef.h iit-read.c iit-read.h \
md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h genome.c genome.h \
@@ -318,6 +326,7 @@ SNPINDEX_FILES = fopen.h bool.h types.h \
littleendian.c littleendian.h bigendian.c bigendian.h \
univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
genomicpos.c genomicpos.h compress.c compress.h compress-write.c compress-write.h \
popcount.c popcount.h genome128_hr.c genome128_hr.h \
@@ -341,6 +350,7 @@ CMETINDEX_FILES = fopen.h bool.h types.h \
genomicpos.c genomicpos.h \
stopwatch.c stopwatch.h access.c access.h \
univinterval.c univinterval.h interval.h interval.c \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.h iit-read.c \
complement.h compress.c compress.h compress-write.c compress-write.h \
popcount.c popcount.h genome128_hr.c genome128_hr.h \
@@ -366,6 +376,7 @@ ATOIINDEX_FILES = fopen.h bool.h types.h \
genomicpos.c genomicpos.h \
stopwatch.c stopwatch.h access.c access.h \
univinterval.c univinterval.h interval.h interval.c \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.h iit-read.c \
complement.h compress.c compress.h compress-write.c compress-write.h \
popcount.c popcount.h genome128_hr.c genome128_hr.h \
@@ -391,6 +402,7 @@ GET_GENOME_FILES = fopen.h bool.h types.h separator.h \
littleendian.c littleendian.h bigendian.c bigendian.h \
univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h \
genome.c genome.h \
@@ -432,6 +444,7 @@ IIT_GET_FILES = fopen.h bool.h types.h \
univinterval.c univinterval.h interval.c interval.h \
uintlist.c uintlist.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
complement.h parserange.c parserange.h getopt.c getopt1.c getopt.h iit_get.c
@@ -447,6 +460,7 @@ IIT_DUMP_FILES = fopen.h bool.h types.h \
intlistdef.h intlist.c intlist.h list.c list.h \
univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
getopt.c getopt1.c getopt.h iit_dump.c
@@ -465,6 +479,7 @@ SAM_SORT_FILES = bool.h types.h \
stopwatch.c stopwatch.h access.c access.h \
univinterval.c univinterval.h interval.c interval.h \
uintlist.c uintlist.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
samflags.h samheader.c samheader.h samread.c samread.h \
datadir.c datadir.h \
diff --git a/src/Makefile.in b/src/Makefile.in
index 7e4b800..55726b0 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -42,7 +42,7 @@ bin_PROGRAMS = gmap$(EXEEXT) gmapl$(EXEEXT) get-genome$(EXEEXT) \
cmetindex$(EXEEXT) atoiindex$(EXEEXT) sam_sort$(EXEEXT)
subdir = src
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
- $(srcdir)/config.h.in
+ $(srcdir)/config.h.in ChangeLog
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \
$(top_srcdir)/config/ltoptions.m4 \
@@ -78,6 +78,7 @@ am__objects_1 = atoiindex-except.$(OBJEXT) atoiindex-assert.$(OBJEXT) \
atoiindex-bigendian.$(OBJEXT) atoiindex-genomicpos.$(OBJEXT) \
atoiindex-stopwatch.$(OBJEXT) atoiindex-access.$(OBJEXT) \
atoiindex-univinterval.$(OBJEXT) atoiindex-interval.$(OBJEXT) \
+ atoiindex-filestring.$(OBJEXT) \
atoiindex-iit-read-univ.$(OBJEXT) atoiindex-iit-read.$(OBJEXT) \
atoiindex-compress.$(OBJEXT) \
atoiindex-compress-write.$(OBJEXT) \
@@ -107,6 +108,7 @@ am__objects_2 = cmetindex-except.$(OBJEXT) cmetindex-assert.$(OBJEXT) \
cmetindex-bigendian.$(OBJEXT) cmetindex-genomicpos.$(OBJEXT) \
cmetindex-stopwatch.$(OBJEXT) cmetindex-access.$(OBJEXT) \
cmetindex-univinterval.$(OBJEXT) cmetindex-interval.$(OBJEXT) \
+ cmetindex-filestring.$(OBJEXT) \
cmetindex-iit-read-univ.$(OBJEXT) cmetindex-iit-read.$(OBJEXT) \
cmetindex-compress.$(OBJEXT) \
cmetindex-compress-write.$(OBJEXT) \
@@ -138,6 +140,7 @@ am__objects_3 = get_genome-except.$(OBJEXT) \
get_genome-univinterval.$(OBJEXT) \
get_genome-interval.$(OBJEXT) get_genome-uintlist.$(OBJEXT) \
get_genome-stopwatch.$(OBJEXT) get_genome-access.$(OBJEXT) \
+ get_genome-filestring.$(OBJEXT) \
get_genome-iit-read-univ.$(OBJEXT) \
get_genome-iit-read.$(OBJEXT) get_genome-md5.$(OBJEXT) \
get_genome-bzip2.$(OBJEXT) get_genome-sequence.$(OBJEXT) \
@@ -158,17 +161,18 @@ am__objects_4 = gmap-except.$(OBJEXT) gmap-assert.$(OBJEXT) \
gmap-littleendian.$(OBJEXT) gmap-bigendian.$(OBJEXT) \
gmap-univinterval.$(OBJEXT) gmap-interval.$(OBJEXT) \
gmap-uintlist.$(OBJEXT) gmap-stopwatch.$(OBJEXT) \
- gmap-access.$(OBJEXT) gmap-iit-read-univ.$(OBJEXT) \
- gmap-iit-read.$(OBJEXT) gmap-md5.$(OBJEXT) \
- gmap-bzip2.$(OBJEXT) gmap-sequence.$(OBJEXT) \
- gmap-reader.$(OBJEXT) gmap-genomicpos.$(OBJEXT) \
- gmap-compress.$(OBJEXT) gmap-compress-write.$(OBJEXT) \
- gmap-gbuffer.$(OBJEXT) gmap-genome.$(OBJEXT) \
- gmap-popcount.$(OBJEXT) gmap-genome128_hr.$(OBJEXT) \
- gmap-genome_sites.$(OBJEXT) gmap-genome-write.$(OBJEXT) \
- gmap-bitpack64-read.$(OBJEXT) gmap-bitpack64-readtwo.$(OBJEXT) \
- gmap-indexdb.$(OBJEXT) gmap-indexdb_hr.$(OBJEXT) \
- gmap-oligo.$(OBJEXT) gmap-block.$(OBJEXT) gmap-chrom.$(OBJEXT) \
+ gmap-access.$(OBJEXT) gmap-filestring.$(OBJEXT) \
+ gmap-iit-read-univ.$(OBJEXT) gmap-iit-read.$(OBJEXT) \
+ gmap-md5.$(OBJEXT) gmap-bzip2.$(OBJEXT) \
+ gmap-sequence.$(OBJEXT) gmap-reader.$(OBJEXT) \
+ gmap-genomicpos.$(OBJEXT) gmap-compress.$(OBJEXT) \
+ gmap-compress-write.$(OBJEXT) gmap-gbuffer.$(OBJEXT) \
+ gmap-genome.$(OBJEXT) gmap-popcount.$(OBJEXT) \
+ gmap-genome128_hr.$(OBJEXT) gmap-genome_sites.$(OBJEXT) \
+ gmap-genome-write.$(OBJEXT) gmap-bitpack64-read.$(OBJEXT) \
+ gmap-bitpack64-readtwo.$(OBJEXT) gmap-indexdb.$(OBJEXT) \
+ gmap-indexdb_hr.$(OBJEXT) gmap-oligo.$(OBJEXT) \
+ gmap-block.$(OBJEXT) gmap-chrom.$(OBJEXT) \
gmap-segmentpos.$(OBJEXT) gmap-chrnum.$(OBJEXT) \
gmap-uinttable.$(OBJEXT) gmap-gregion.$(OBJEXT) \
gmap-match.$(OBJEXT) gmap-matchpool.$(OBJEXT) \
@@ -188,11 +192,11 @@ am__objects_4 = gmap-except.$(OBJEXT) gmap-assert.$(OBJEXT) \
gmap-dynprog_end.$(OBJEXT) gmap-translation.$(OBJEXT) \
gmap-pbinom.$(OBJEXT) gmap-changepoint.$(OBJEXT) \
gmap-stage3.$(OBJEXT) gmap-request.$(OBJEXT) \
- gmap-result.$(OBJEXT) gmap-inbuffer.$(OBJEXT) \
- gmap-samheader.$(OBJEXT) gmap-outbuffer.$(OBJEXT) \
- gmap-chimera.$(OBJEXT) gmap-datadir.$(OBJEXT) \
- gmap-getopt.$(OBJEXT) gmap-getopt1.$(OBJEXT) \
- gmap-gmap.$(OBJEXT)
+ gmap-result.$(OBJEXT) gmap-output.$(OBJEXT) \
+ gmap-inbuffer.$(OBJEXT) gmap-samheader.$(OBJEXT) \
+ gmap-outbuffer.$(OBJEXT) gmap-chimera.$(OBJEXT) \
+ gmap-datadir.$(OBJEXT) gmap-getopt.$(OBJEXT) \
+ gmap-getopt1.$(OBJEXT) gmap-gmap.$(OBJEXT)
dist_gmap_OBJECTS = $(am__objects_4)
gmap_OBJECTS = $(dist_gmap_OBJECTS)
gmap_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
@@ -206,6 +210,7 @@ am__objects_5 = gmapindex-except.$(OBJEXT) gmapindex-assert.$(OBJEXT) \
gmapindex-bigendian.$(OBJEXT) gmapindex-univinterval.$(OBJEXT) \
gmapindex-interval.$(OBJEXT) gmapindex-uintlist.$(OBJEXT) \
gmapindex-stopwatch.$(OBJEXT) gmapindex-access.$(OBJEXT) \
+ gmapindex-filestring.$(OBJEXT) \
gmapindex-iit-read-univ.$(OBJEXT) \
gmapindex-iit-write-univ.$(OBJEXT) \
gmapindex-iit-read.$(OBJEXT) gmapindex-md5.$(OBJEXT) \
@@ -237,14 +242,15 @@ am__objects_6 = gmapl-except.$(OBJEXT) gmapl-assert.$(OBJEXT) \
gmapl-bigendian.$(OBJEXT) gmapl-univinterval.$(OBJEXT) \
gmapl-interval.$(OBJEXT) gmapl-uintlist.$(OBJEXT) \
gmapl-stopwatch.$(OBJEXT) gmapl-access.$(OBJEXT) \
- gmapl-iit-read-univ.$(OBJEXT) gmapl-iit-read.$(OBJEXT) \
- gmapl-md5.$(OBJEXT) gmapl-bzip2.$(OBJEXT) \
- gmapl-sequence.$(OBJEXT) gmapl-reader.$(OBJEXT) \
- gmapl-genomicpos.$(OBJEXT) gmapl-compress.$(OBJEXT) \
- gmapl-compress-write.$(OBJEXT) gmapl-gbuffer.$(OBJEXT) \
- gmapl-genome.$(OBJEXT) gmapl-popcount.$(OBJEXT) \
- gmapl-genome128_hr.$(OBJEXT) gmapl-genome_sites.$(OBJEXT) \
- gmapl-genome-write.$(OBJEXT) gmapl-bitpack64-read.$(OBJEXT) \
+ gmapl-filestring.$(OBJEXT) gmapl-iit-read-univ.$(OBJEXT) \
+ gmapl-iit-read.$(OBJEXT) gmapl-md5.$(OBJEXT) \
+ gmapl-bzip2.$(OBJEXT) gmapl-sequence.$(OBJEXT) \
+ gmapl-reader.$(OBJEXT) gmapl-genomicpos.$(OBJEXT) \
+ gmapl-compress.$(OBJEXT) gmapl-compress-write.$(OBJEXT) \
+ gmapl-gbuffer.$(OBJEXT) gmapl-genome.$(OBJEXT) \
+ gmapl-popcount.$(OBJEXT) gmapl-genome128_hr.$(OBJEXT) \
+ gmapl-genome_sites.$(OBJEXT) gmapl-genome-write.$(OBJEXT) \
+ gmapl-bitpack64-read.$(OBJEXT) \
gmapl-bitpack64-readtwo.$(OBJEXT) gmapl-indexdb.$(OBJEXT) \
gmapl-indexdb_hr.$(OBJEXT) gmapl-oligo.$(OBJEXT) \
gmapl-block.$(OBJEXT) gmapl-chrom.$(OBJEXT) \
@@ -267,11 +273,11 @@ am__objects_6 = gmapl-except.$(OBJEXT) gmapl-assert.$(OBJEXT) \
gmapl-dynprog_end.$(OBJEXT) gmapl-translation.$(OBJEXT) \
gmapl-pbinom.$(OBJEXT) gmapl-changepoint.$(OBJEXT) \
gmapl-stage3.$(OBJEXT) gmapl-request.$(OBJEXT) \
- gmapl-result.$(OBJEXT) gmapl-inbuffer.$(OBJEXT) \
- gmapl-samheader.$(OBJEXT) gmapl-outbuffer.$(OBJEXT) \
- gmapl-chimera.$(OBJEXT) gmapl-datadir.$(OBJEXT) \
- gmapl-getopt.$(OBJEXT) gmapl-getopt1.$(OBJEXT) \
- gmapl-gmap.$(OBJEXT)
+ gmapl-result.$(OBJEXT) gmapl-output.$(OBJEXT) \
+ gmapl-inbuffer.$(OBJEXT) gmapl-samheader.$(OBJEXT) \
+ gmapl-outbuffer.$(OBJEXT) gmapl-chimera.$(OBJEXT) \
+ gmapl-datadir.$(OBJEXT) gmapl-getopt.$(OBJEXT) \
+ gmapl-getopt1.$(OBJEXT) gmapl-gmap.$(OBJEXT)
dist_gmapl_OBJECTS = $(am__objects_6)
gmapl_OBJECTS = $(dist_gmapl_OBJECTS)
gmapl_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
@@ -285,20 +291,20 @@ am__objects_7 = gsnap-except.$(OBJEXT) gsnap-assert.$(OBJEXT) \
gsnap-bigendian.$(OBJEXT) gsnap-univinterval.$(OBJEXT) \
gsnap-interval.$(OBJEXT) gsnap-uintlist.$(OBJEXT) \
gsnap-stopwatch.$(OBJEXT) gsnap-access.$(OBJEXT) \
- gsnap-iit-read-univ.$(OBJEXT) gsnap-iit-read.$(OBJEXT) \
- gsnap-md5.$(OBJEXT) gsnap-bzip2.$(OBJEXT) \
- gsnap-sequence.$(OBJEXT) gsnap-reader.$(OBJEXT) \
- gsnap-genomicpos.$(OBJEXT) gsnap-compress.$(OBJEXT) \
- gsnap-genome.$(OBJEXT) gsnap-popcount.$(OBJEXT) \
- gsnap-genome128_hr.$(OBJEXT) gsnap-genome_sites.$(OBJEXT) \
- gsnap-bitpack64-read.$(OBJEXT) \
+ gsnap-filestring.$(OBJEXT) gsnap-iit-read-univ.$(OBJEXT) \
+ gsnap-iit-read.$(OBJEXT) gsnap-md5.$(OBJEXT) \
+ gsnap-bzip2.$(OBJEXT) gsnap-sequence.$(OBJEXT) \
+ gsnap-reader.$(OBJEXT) gsnap-genomicpos.$(OBJEXT) \
+ gsnap-compress.$(OBJEXT) gsnap-genome.$(OBJEXT) \
+ gsnap-popcount.$(OBJEXT) gsnap-genome128_hr.$(OBJEXT) \
+ gsnap-genome_sites.$(OBJEXT) gsnap-bitpack64-read.$(OBJEXT) \
gsnap-bitpack64-readtwo.$(OBJEXT) gsnap-indexdb.$(OBJEXT) \
gsnap-indexdb_hr.$(OBJEXT) gsnap-oligo.$(OBJEXT) \
gsnap-chrom.$(OBJEXT) gsnap-segmentpos.$(OBJEXT) \
gsnap-chrnum.$(OBJEXT) gsnap-maxent_hr.$(OBJEXT) \
gsnap-samprint.$(OBJEXT) gsnap-mapq.$(OBJEXT) \
gsnap-shortread.$(OBJEXT) gsnap-substring.$(OBJEXT) \
- gsnap-stage3hr.$(OBJEXT) gsnap-goby.$(OBJEXT) \
+ gsnap-junction.$(OBJEXT) gsnap-stage3hr.$(OBJEXT) \
gsnap-spanningelt.$(OBJEXT) gsnap-cmet.$(OBJEXT) \
gsnap-atoi.$(OBJEXT) gsnap-maxent.$(OBJEXT) \
gsnap-pair.$(OBJEXT) gsnap-pairpool.$(OBJEXT) \
@@ -317,8 +323,9 @@ am__objects_7 = gsnap-except.$(OBJEXT) gsnap-assert.$(OBJEXT) \
gsnap-splicetrie_build.$(OBJEXT) gsnap-splicetrie.$(OBJEXT) \
gsnap-splice.$(OBJEXT) gsnap-indel.$(OBJEXT) \
gsnap-bitpack64-access.$(OBJEXT) gsnap-bytecoding.$(OBJEXT) \
- gsnap-sarray-read.$(OBJEXT) gsnap-stage1hr.$(OBJEXT) \
- gsnap-request.$(OBJEXT) gsnap-resulthr.$(OBJEXT) \
+ gsnap-univdiag.$(OBJEXT) gsnap-sarray-read.$(OBJEXT) \
+ gsnap-stage1hr.$(OBJEXT) gsnap-request.$(OBJEXT) \
+ gsnap-resulthr.$(OBJEXT) gsnap-output.$(OBJEXT) \
gsnap-inbuffer.$(OBJEXT) gsnap-samheader.$(OBJEXT) \
gsnap-outbuffer.$(OBJEXT) gsnap-datadir.$(OBJEXT) \
gsnap-getopt.$(OBJEXT) gsnap-getopt1.$(OBJEXT) \
@@ -326,7 +333,7 @@ am__objects_7 = gsnap-except.$(OBJEXT) gsnap-assert.$(OBJEXT) \
dist_gsnap_OBJECTS = $(am__objects_7)
gsnap_OBJECTS = $(dist_gsnap_OBJECTS)
gsnap_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
- $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
+ $(am__DEPENDENCIES_1)
gsnap_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(gsnap_CFLAGS) $(CFLAGS) $(gsnap_LDFLAGS) \
$(LDFLAGS) -o $@
@@ -336,20 +343,21 @@ am__objects_8 = gsnapl-except.$(OBJEXT) gsnapl-assert.$(OBJEXT) \
gsnapl-bigendian.$(OBJEXT) gsnapl-univinterval.$(OBJEXT) \
gsnapl-interval.$(OBJEXT) gsnapl-uintlist.$(OBJEXT) \
gsnapl-uint8list.$(OBJEXT) gsnapl-stopwatch.$(OBJEXT) \
- gsnapl-access.$(OBJEXT) gsnapl-iit-read-univ.$(OBJEXT) \
- gsnapl-iit-read.$(OBJEXT) gsnapl-md5.$(OBJEXT) \
- gsnapl-bzip2.$(OBJEXT) gsnapl-sequence.$(OBJEXT) \
- gsnapl-reader.$(OBJEXT) gsnapl-genomicpos.$(OBJEXT) \
- gsnapl-compress.$(OBJEXT) gsnapl-genome.$(OBJEXT) \
- gsnapl-popcount.$(OBJEXT) gsnapl-genome128_hr.$(OBJEXT) \
- gsnapl-genome_sites.$(OBJEXT) gsnapl-bitpack64-read.$(OBJEXT) \
+ gsnapl-access.$(OBJEXT) gsnapl-filestring.$(OBJEXT) \
+ gsnapl-iit-read-univ.$(OBJEXT) gsnapl-iit-read.$(OBJEXT) \
+ gsnapl-md5.$(OBJEXT) gsnapl-bzip2.$(OBJEXT) \
+ gsnapl-sequence.$(OBJEXT) gsnapl-reader.$(OBJEXT) \
+ gsnapl-genomicpos.$(OBJEXT) gsnapl-compress.$(OBJEXT) \
+ gsnapl-genome.$(OBJEXT) gsnapl-popcount.$(OBJEXT) \
+ gsnapl-genome128_hr.$(OBJEXT) gsnapl-genome_sites.$(OBJEXT) \
+ gsnapl-bitpack64-read.$(OBJEXT) \
gsnapl-bitpack64-readtwo.$(OBJEXT) gsnapl-indexdb.$(OBJEXT) \
gsnapl-indexdb_hr.$(OBJEXT) gsnapl-oligo.$(OBJEXT) \
gsnapl-chrom.$(OBJEXT) gsnapl-segmentpos.$(OBJEXT) \
gsnapl-chrnum.$(OBJEXT) gsnapl-maxent_hr.$(OBJEXT) \
gsnapl-samprint.$(OBJEXT) gsnapl-mapq.$(OBJEXT) \
gsnapl-shortread.$(OBJEXT) gsnapl-substring.$(OBJEXT) \
- gsnapl-stage3hr.$(OBJEXT) gsnapl-goby.$(OBJEXT) \
+ gsnapl-junction.$(OBJEXT) gsnapl-stage3hr.$(OBJEXT) \
gsnapl-spanningelt.$(OBJEXT) gsnapl-cmet.$(OBJEXT) \
gsnapl-atoi.$(OBJEXT) gsnapl-maxent.$(OBJEXT) \
gsnapl-pair.$(OBJEXT) gsnapl-pairpool.$(OBJEXT) \
@@ -369,14 +377,14 @@ am__objects_8 = gsnapl-except.$(OBJEXT) gsnapl-assert.$(OBJEXT) \
gsnapl-splice.$(OBJEXT) gsnapl-indel.$(OBJEXT) \
gsnapl-bitpack64-access.$(OBJEXT) gsnapl-stage1hr.$(OBJEXT) \
gsnapl-request.$(OBJEXT) gsnapl-resulthr.$(OBJEXT) \
- gsnapl-inbuffer.$(OBJEXT) gsnapl-samheader.$(OBJEXT) \
- gsnapl-outbuffer.$(OBJEXT) gsnapl-datadir.$(OBJEXT) \
- gsnapl-getopt.$(OBJEXT) gsnapl-getopt1.$(OBJEXT) \
- gsnapl-gsnap.$(OBJEXT)
+ gsnapl-output.$(OBJEXT) gsnapl-inbuffer.$(OBJEXT) \
+ gsnapl-samheader.$(OBJEXT) gsnapl-outbuffer.$(OBJEXT) \
+ gsnapl-datadir.$(OBJEXT) gsnapl-getopt.$(OBJEXT) \
+ gsnapl-getopt1.$(OBJEXT) gsnapl-gsnap.$(OBJEXT)
dist_gsnapl_OBJECTS = $(am__objects_8)
gsnapl_OBJECTS = $(dist_gsnapl_OBJECTS)
gsnapl_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
- $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
+ $(am__DEPENDENCIES_1)
gsnapl_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(gsnapl_CFLAGS) $(CFLAGS) \
$(gsnapl_LDFLAGS) $(LDFLAGS) -o $@
@@ -386,9 +394,9 @@ am__objects_9 = iit_dump-except.$(OBJEXT) iit_dump-assert.$(OBJEXT) \
iit_dump-list.$(OBJEXT) iit_dump-univinterval.$(OBJEXT) \
iit_dump-interval.$(OBJEXT) iit_dump-uintlist.$(OBJEXT) \
iit_dump-stopwatch.$(OBJEXT) iit_dump-access.$(OBJEXT) \
- iit_dump-iit-read-univ.$(OBJEXT) iit_dump-iit-read.$(OBJEXT) \
- iit_dump-getopt.$(OBJEXT) iit_dump-getopt1.$(OBJEXT) \
- iit_dump-iit_dump.$(OBJEXT)
+ iit_dump-filestring.$(OBJEXT) iit_dump-iit-read-univ.$(OBJEXT) \
+ iit_dump-iit-read.$(OBJEXT) iit_dump-getopt.$(OBJEXT) \
+ iit_dump-getopt1.$(OBJEXT) iit_dump-iit_dump.$(OBJEXT)
dist_iit_dump_OBJECTS = $(am__objects_9)
iit_dump_OBJECTS = $(dist_iit_dump_OBJECTS)
iit_dump_DEPENDENCIES = $(am__DEPENDENCIES_1)
@@ -401,9 +409,10 @@ am__objects_10 = iit_get-except.$(OBJEXT) iit_get-assert.$(OBJEXT) \
iit_get-bigendian.$(OBJEXT) iit_get-univinterval.$(OBJEXT) \
iit_get-interval.$(OBJEXT) iit_get-uintlist.$(OBJEXT) \
iit_get-stopwatch.$(OBJEXT) iit_get-access.$(OBJEXT) \
- iit_get-iit-read-univ.$(OBJEXT) iit_get-iit-read.$(OBJEXT) \
- iit_get-parserange.$(OBJEXT) iit_get-getopt.$(OBJEXT) \
- iit_get-getopt1.$(OBJEXT) iit_get-iit_get.$(OBJEXT)
+ iit_get-filestring.$(OBJEXT) iit_get-iit-read-univ.$(OBJEXT) \
+ iit_get-iit-read.$(OBJEXT) iit_get-parserange.$(OBJEXT) \
+ iit_get-getopt.$(OBJEXT) iit_get-getopt1.$(OBJEXT) \
+ iit_get-iit_get.$(OBJEXT)
dist_iit_get_OBJECTS = $(am__objects_10)
iit_get_OBJECTS = $(dist_iit_get_OBJECTS)
iit_get_DEPENDENCIES = $(am__DEPENDENCIES_1)
@@ -434,10 +443,11 @@ am__objects_12 = sam_sort-except.$(OBJEXT) sam_sort-assert.$(OBJEXT) \
sam_sort-list.$(OBJEXT) sam_sort-stopwatch.$(OBJEXT) \
sam_sort-access.$(OBJEXT) sam_sort-univinterval.$(OBJEXT) \
sam_sort-interval.$(OBJEXT) sam_sort-uintlist.$(OBJEXT) \
- sam_sort-iit-read-univ.$(OBJEXT) sam_sort-iit-read.$(OBJEXT) \
- sam_sort-samheader.$(OBJEXT) sam_sort-samread.$(OBJEXT) \
- sam_sort-datadir.$(OBJEXT) sam_sort-getopt.$(OBJEXT) \
- sam_sort-getopt1.$(OBJEXT) sam_sort-sam_sort.$(OBJEXT)
+ sam_sort-filestring.$(OBJEXT) sam_sort-iit-read-univ.$(OBJEXT) \
+ sam_sort-iit-read.$(OBJEXT) sam_sort-samheader.$(OBJEXT) \
+ sam_sort-samread.$(OBJEXT) sam_sort-datadir.$(OBJEXT) \
+ sam_sort-getopt.$(OBJEXT) sam_sort-getopt1.$(OBJEXT) \
+ sam_sort-sam_sort.$(OBJEXT)
dist_sam_sort_OBJECTS = $(am__objects_12)
sam_sort_OBJECTS = $(dist_sam_sort_OBJECTS)
sam_sort_DEPENDENCIES = $(am__DEPENDENCIES_1)
@@ -450,10 +460,10 @@ am__objects_13 = snpindex-except.$(OBJEXT) snpindex-assert.$(OBJEXT) \
snpindex-bigendian.$(OBJEXT) snpindex-univinterval.$(OBJEXT) \
snpindex-interval.$(OBJEXT) snpindex-uintlist.$(OBJEXT) \
snpindex-stopwatch.$(OBJEXT) snpindex-access.$(OBJEXT) \
- snpindex-iit-read-univ.$(OBJEXT) snpindex-iit-read.$(OBJEXT) \
- snpindex-genomicpos.$(OBJEXT) snpindex-compress.$(OBJEXT) \
- snpindex-compress-write.$(OBJEXT) snpindex-popcount.$(OBJEXT) \
- snpindex-genome128_hr.$(OBJEXT) \
+ snpindex-filestring.$(OBJEXT) snpindex-iit-read-univ.$(OBJEXT) \
+ snpindex-iit-read.$(OBJEXT) snpindex-genomicpos.$(OBJEXT) \
+ snpindex-compress.$(OBJEXT) snpindex-compress-write.$(OBJEXT) \
+ snpindex-popcount.$(OBJEXT) snpindex-genome128_hr.$(OBJEXT) \
snpindex-bitpack64-read.$(OBJEXT) \
snpindex-bitpack64-readtwo.$(OBJEXT) \
snpindex-bitpack64-write.$(OBJEXT) snpindex-indexdb.$(OBJEXT) \
@@ -475,12 +485,12 @@ am__objects_14 = uniqscan-except.$(OBJEXT) uniqscan-assert.$(OBJEXT) \
uniqscan-bigendian.$(OBJEXT) uniqscan-univinterval.$(OBJEXT) \
uniqscan-interval.$(OBJEXT) uniqscan-uintlist.$(OBJEXT) \
uniqscan-stopwatch.$(OBJEXT) uniqscan-access.$(OBJEXT) \
- uniqscan-iit-read-univ.$(OBJEXT) uniqscan-iit-read.$(OBJEXT) \
- uniqscan-md5.$(OBJEXT) uniqscan-bzip2.$(OBJEXT) \
- uniqscan-sequence.$(OBJEXT) uniqscan-reader.$(OBJEXT) \
- uniqscan-genomicpos.$(OBJEXT) uniqscan-compress.$(OBJEXT) \
- uniqscan-genome.$(OBJEXT) uniqscan-popcount.$(OBJEXT) \
- uniqscan-genome128_hr.$(OBJEXT) \
+ uniqscan-filestring.$(OBJEXT) uniqscan-iit-read-univ.$(OBJEXT) \
+ uniqscan-iit-read.$(OBJEXT) uniqscan-md5.$(OBJEXT) \
+ uniqscan-bzip2.$(OBJEXT) uniqscan-sequence.$(OBJEXT) \
+ uniqscan-reader.$(OBJEXT) uniqscan-genomicpos.$(OBJEXT) \
+ uniqscan-compress.$(OBJEXT) uniqscan-genome.$(OBJEXT) \
+ uniqscan-popcount.$(OBJEXT) uniqscan-genome128_hr.$(OBJEXT) \
uniqscan-genome_sites.$(OBJEXT) \
uniqscan-bitpack64-read.$(OBJEXT) \
uniqscan-bitpack64-readtwo.$(OBJEXT) \
@@ -489,16 +499,16 @@ am__objects_14 = uniqscan-except.$(OBJEXT) uniqscan-assert.$(OBJEXT) \
uniqscan-segmentpos.$(OBJEXT) uniqscan-chrnum.$(OBJEXT) \
uniqscan-maxent_hr.$(OBJEXT) uniqscan-mapq.$(OBJEXT) \
uniqscan-shortread.$(OBJEXT) uniqscan-substring.$(OBJEXT) \
- uniqscan-stage3hr.$(OBJEXT) uniqscan-spanningelt.$(OBJEXT) \
- uniqscan-cmet.$(OBJEXT) uniqscan-atoi.$(OBJEXT) \
- uniqscan-maxent.$(OBJEXT) uniqscan-pair.$(OBJEXT) \
- uniqscan-pairpool.$(OBJEXT) uniqscan-diag.$(OBJEXT) \
- uniqscan-diagpool.$(OBJEXT) uniqscan-orderstat.$(OBJEXT) \
- uniqscan-oligoindex_hr.$(OBJEXT) uniqscan-cellpool.$(OBJEXT) \
- uniqscan-stage2.$(OBJEXT) uniqscan-intron.$(OBJEXT) \
- uniqscan-boyer-moore.$(OBJEXT) uniqscan-changepoint.$(OBJEXT) \
- uniqscan-pbinom.$(OBJEXT) uniqscan-dynprog.$(OBJEXT) \
- uniqscan-dynprog_simd.$(OBJEXT) \
+ uniqscan-junction.$(OBJEXT) uniqscan-stage3hr.$(OBJEXT) \
+ uniqscan-spanningelt.$(OBJEXT) uniqscan-cmet.$(OBJEXT) \
+ uniqscan-atoi.$(OBJEXT) uniqscan-maxent.$(OBJEXT) \
+ uniqscan-pair.$(OBJEXT) uniqscan-pairpool.$(OBJEXT) \
+ uniqscan-diag.$(OBJEXT) uniqscan-diagpool.$(OBJEXT) \
+ uniqscan-orderstat.$(OBJEXT) uniqscan-oligoindex_hr.$(OBJEXT) \
+ uniqscan-cellpool.$(OBJEXT) uniqscan-stage2.$(OBJEXT) \
+ uniqscan-intron.$(OBJEXT) uniqscan-boyer-moore.$(OBJEXT) \
+ uniqscan-changepoint.$(OBJEXT) uniqscan-pbinom.$(OBJEXT) \
+ uniqscan-dynprog.$(OBJEXT) uniqscan-dynprog_simd.$(OBJEXT) \
uniqscan-dynprog_single.$(OBJEXT) \
uniqscan-dynprog_genome.$(OBJEXT) \
uniqscan-dynprog_cdna.$(OBJEXT) uniqscan-dynprog_end.$(OBJEXT) \
@@ -508,10 +518,11 @@ am__objects_14 = uniqscan-except.$(OBJEXT) uniqscan-assert.$(OBJEXT) \
uniqscan-splicetrie_build.$(OBJEXT) \
uniqscan-splicetrie.$(OBJEXT) uniqscan-splice.$(OBJEXT) \
uniqscan-indel.$(OBJEXT) uniqscan-bitpack64-access.$(OBJEXT) \
- uniqscan-bytecoding.$(OBJEXT) uniqscan-sarray-read.$(OBJEXT) \
- uniqscan-stage1hr.$(OBJEXT) uniqscan-resulthr.$(OBJEXT) \
- uniqscan-datadir.$(OBJEXT) uniqscan-getopt.$(OBJEXT) \
- uniqscan-getopt1.$(OBJEXT) uniqscan-uniqscan.$(OBJEXT)
+ uniqscan-bytecoding.$(OBJEXT) uniqscan-univdiag.$(OBJEXT) \
+ uniqscan-sarray-read.$(OBJEXT) uniqscan-stage1hr.$(OBJEXT) \
+ uniqscan-resulthr.$(OBJEXT) uniqscan-datadir.$(OBJEXT) \
+ uniqscan-getopt.$(OBJEXT) uniqscan-getopt1.$(OBJEXT) \
+ uniqscan-uniqscan.$(OBJEXT)
dist_uniqscan_OBJECTS = $(am__objects_14)
uniqscan_OBJECTS = $(dist_uniqscan_OBJECTS)
uniqscan_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
@@ -525,12 +536,13 @@ am__objects_15 = uniqscanl-except.$(OBJEXT) uniqscanl-assert.$(OBJEXT) \
uniqscanl-bigendian.$(OBJEXT) uniqscanl-univinterval.$(OBJEXT) \
uniqscanl-interval.$(OBJEXT) uniqscanl-uintlist.$(OBJEXT) \
uniqscanl-uint8list.$(OBJEXT) uniqscanl-stopwatch.$(OBJEXT) \
- uniqscanl-access.$(OBJEXT) uniqscanl-iit-read-univ.$(OBJEXT) \
- uniqscanl-iit-read.$(OBJEXT) uniqscanl-md5.$(OBJEXT) \
- uniqscanl-bzip2.$(OBJEXT) uniqscanl-sequence.$(OBJEXT) \
- uniqscanl-reader.$(OBJEXT) uniqscanl-genomicpos.$(OBJEXT) \
- uniqscanl-compress.$(OBJEXT) uniqscanl-genome.$(OBJEXT) \
- uniqscanl-popcount.$(OBJEXT) uniqscanl-genome128_hr.$(OBJEXT) \
+ uniqscanl-access.$(OBJEXT) uniqscanl-filestring.$(OBJEXT) \
+ uniqscanl-iit-read-univ.$(OBJEXT) uniqscanl-iit-read.$(OBJEXT) \
+ uniqscanl-md5.$(OBJEXT) uniqscanl-bzip2.$(OBJEXT) \
+ uniqscanl-sequence.$(OBJEXT) uniqscanl-reader.$(OBJEXT) \
+ uniqscanl-genomicpos.$(OBJEXT) uniqscanl-compress.$(OBJEXT) \
+ uniqscanl-genome.$(OBJEXT) uniqscanl-popcount.$(OBJEXT) \
+ uniqscanl-genome128_hr.$(OBJEXT) \
uniqscanl-genome_sites.$(OBJEXT) \
uniqscanl-bitpack64-read.$(OBJEXT) \
uniqscanl-bitpack64-readtwo.$(OBJEXT) \
@@ -539,11 +551,12 @@ am__objects_15 = uniqscanl-except.$(OBJEXT) uniqscanl-assert.$(OBJEXT) \
uniqscanl-segmentpos.$(OBJEXT) uniqscanl-chrnum.$(OBJEXT) \
uniqscanl-maxent_hr.$(OBJEXT) uniqscanl-mapq.$(OBJEXT) \
uniqscanl-shortread.$(OBJEXT) uniqscanl-substring.$(OBJEXT) \
- uniqscanl-stage3hr.$(OBJEXT) uniqscanl-spanningelt.$(OBJEXT) \
- uniqscanl-cmet.$(OBJEXT) uniqscanl-atoi.$(OBJEXT) \
- uniqscanl-maxent.$(OBJEXT) uniqscanl-pair.$(OBJEXT) \
- uniqscanl-pairpool.$(OBJEXT) uniqscanl-diag.$(OBJEXT) \
- uniqscanl-diagpool.$(OBJEXT) uniqscanl-orderstat.$(OBJEXT) \
+ uniqscanl-junction.$(OBJEXT) uniqscanl-stage3hr.$(OBJEXT) \
+ uniqscanl-spanningelt.$(OBJEXT) uniqscanl-cmet.$(OBJEXT) \
+ uniqscanl-atoi.$(OBJEXT) uniqscanl-maxent.$(OBJEXT) \
+ uniqscanl-pair.$(OBJEXT) uniqscanl-pairpool.$(OBJEXT) \
+ uniqscanl-diag.$(OBJEXT) uniqscanl-diagpool.$(OBJEXT) \
+ uniqscanl-orderstat.$(OBJEXT) \
uniqscanl-oligoindex_hr.$(OBJEXT) uniqscanl-cellpool.$(OBJEXT) \
uniqscanl-stage2.$(OBJEXT) uniqscanl-intron.$(OBJEXT) \
uniqscanl-boyer-moore.$(OBJEXT) \
@@ -629,9 +642,6 @@ EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GMAPDB = @GMAPDB@
-GOBY_CFLAGS = @GOBY_CFLAGS@
-GOBY_LDFLAGS = @GOBY_LDFLAGS@
-GOBY_LIBS = @GOBY_LIBS@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
@@ -651,6 +661,7 @@ MAX_READLENGTH = @MAX_READLENGTH@
MKDIR_P = @MKDIR_P@
MPICC = @MPICC@
MPILIBS = @MPILIBS@
+MPI_CFLAGS = @MPI_CFLAGS@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
@@ -737,12 +748,16 @@ top_srcdir = @top_srcdir@
# Copied from ../util so "gmap_setup -B src" works during "make check"
CLEANFILES = fa_coords gmap_process
+
+# Include master.c and master.h, so they get included in distribution
+EXTRA_DIST = mpidebug.c mpidebug.h master.c master.h
GMAP_FILES = fopen.h bool.h types.h separator.h comp.h \
except.c except.h assert.c assert.h mem.c mem.h \
intlistdef.h intlist.c intlist.h listdef.h list.c list.h \
littleendian.c littleendian.h bigendian.c bigendian.h \
univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h reader.c reader.h \
genomicpos.c genomicpos.h compress.c compress.h compress-write.c compress-write.h \
@@ -768,7 +783,7 @@ GMAP_FILES = fopen.h bool.h types.h separator.h comp.h \
dynprog_single.c dynprog_single.h dynprog_genome.c dynprog_genome.h dynprog_cdna.c dynprog_cdna.h dynprog_end.c dynprog_end.h \
translation.c translation.h \
pbinom.c pbinom.h changepoint.c changepoint.h sense.h fastlog.h stage3.c stage3.h \
- request.c request.h result.c result.h \
+ request.c request.h result.c result.h output.c output.h \
inbuffer.c inbuffer.h samheader.c samheader.h outbuffer.c outbuffer.h \
chimera.c chimera.h datadir.c datadir.h \
getopt.c getopt1.c getopt.h gmap.c
@@ -786,6 +801,7 @@ GMAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
littleendian.c littleendian.h bigendian.c bigendian.h \
univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h reader.c reader.h \
genomicpos.c genomicpos.h compress.c compress.h compress-write.c compress-write.h \
@@ -811,7 +827,7 @@ GMAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
dynprog_single.c dynprog_single.h dynprog_genome.c dynprog_genome.h dynprog_cdna.c dynprog_cdna.h dynprog_end.c dynprog_end.h \
translation.c translation.h \
pbinom.c pbinom.h changepoint.c changepoint.h sense.h fastlog.h stage3.c stage3.h \
- request.c request.h result.c result.h \
+ request.c request.h result.c result.h output.c output.h \
inbuffer.c inbuffer.h samheader.c samheader.h outbuffer.c outbuffer.h \
chimera.c chimera.h datadir.c datadir.h \
getopt.c getopt1.c getopt.h gmap.c
@@ -830,6 +846,7 @@ GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
univinterval.c univinterval.h interval.c interval.h \
uintlist.c uintlist.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h reader.c reader.h \
genomicpos.c genomicpos.h compress.c compress.h \
@@ -841,8 +858,7 @@ GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
chrom.c chrom.h segmentpos.c segmentpos.h \
chrnum.c chrnum.h \
maxent_hr.c maxent_hr.h samflags.h samprint.c samprint.h \
- mapq.c mapq.h shortread.c shortread.h substring.c substring.h stage3hr.c stage3hr.h \
- goby.c goby.h \
+ mapq.c mapq.h shortread.c shortread.h substring.c substring.h junction.c junction.h stage3hr.c stage3hr.h \
spanningelt.c spanningelt.h cmet.c cmet.h atoi.c atoi.h \
comp.h maxent.c maxent.h pairdef.h pair.c pair.h pairpool.c pairpool.h diag.c diag.h diagpool.c diagpool.h \
orderstat.c orderstat.h oligoindex_hr.c oligoindex_hr.h cellpool.c cellpool.h stage2.c stage2.h \
@@ -854,19 +870,20 @@ GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
chimera.c chimera.h sense.h fastlog.h stage3.c stage3.h \
splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \
splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \
- bytecoding.c bytecoding.h sarray-read.c sarray-read.h \
+ bytecoding.c bytecoding.h univdiagdef.h univdiag.c univdiag.h sarray-read.c sarray-read.h \
stage1hr.c stage1hr.h \
- request.c request.h resulthr.c resulthr.h \
+ request.c request.h resulthr.c resulthr.h output.c output.h \
inbuffer.c inbuffer.h samheader.c samheader.h outbuffer.c outbuffer.h \
datadir.c datadir.h mode.h \
getopt.c getopt1.c getopt.h gsnap.c
# Note: dist_ commands get read by bootstrap, and don't follow the flags
+# -lrt is needed for shm_open
gsnap_CC = $(PTHREAD_CC)
-gsnap_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) $(POPCNT_CFLAGS) $(SIMD_CFLAGS) $(GOBY_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1
-gsnap_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) $(GOBY_LDFLAGS)
-gsnap_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) $(GOBY_LIBS)
+gsnap_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) $(POPCNT_CFLAGS) $(SIMD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1
+gsnap_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
+gsnap_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) -lrt
dist_gsnap_SOURCES = $(GSNAP_FILES)
GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
except.c except.h assert.c assert.h mem.c mem.h \
@@ -875,6 +892,7 @@ GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
univinterval.c univinterval.h interval.c interval.h \
uintlist.c uintlist.h uint8list.c uint8list.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h reader.c reader.h \
genomicpos.c genomicpos.h compress.c compress.h \
@@ -886,8 +904,7 @@ GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
chrom.c chrom.h segmentpos.c segmentpos.h \
chrnum.c chrnum.h \
maxent_hr.c maxent_hr.h samflags.h samprint.c samprint.h \
- mapq.c mapq.h shortread.c shortread.h substring.c substring.h stage3hr.c stage3hr.h \
- goby.c goby.h \
+ mapq.c mapq.h shortread.c shortread.h substring.c substring.h junction.c junction.h stage3hr.c stage3hr.h \
spanningelt.c spanningelt.h cmet.c cmet.h atoi.c atoi.h \
comp.h maxent.c maxent.h pairdef.h pair.c pair.h pairpool.c pairpool.h diag.c diag.h diagpool.c diagpool.h \
orderstat.c orderstat.h oligoindex_hr.c oligoindex_hr.h cellpool.c cellpool.h stage2.c stage2.h \
@@ -900,7 +917,7 @@ GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \
splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \
stage1hr.c stage1hr.h \
- request.c request.h resulthr.c resulthr.h \
+ request.c request.h resulthr.c resulthr.h output.c output.h \
inbuffer.c inbuffer.h samheader.c samheader.h outbuffer.c outbuffer.h \
datadir.c datadir.h mode.h \
getopt.c getopt1.c getopt.h gsnap.c
@@ -908,9 +925,9 @@ GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
# Note: dist_ commands get read by bootstrap, and don't follow the flags
gsnapl_CC = $(PTHREAD_CC)
-gsnapl_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) $(POPCNT_CFLAGS) $(SIMD_CFLAGS) $(GOBY_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1
-gsnapl_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) $(GOBY_LDFLAGS)
-gsnapl_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) $(GOBY_LIBS)
+gsnapl_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) $(POPCNT_CFLAGS) $(SIMD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1
+gsnapl_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
+gsnapl_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnapl_SOURCES = $(GSNAPL_FILES)
UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
except.c except.h assert.c assert.h mem.c mem.h \
@@ -918,6 +935,7 @@ UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
littleendian.c littleendian.h bigendian.c bigendian.h \
univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h reader.c reader.h \
genomicpos.c genomicpos.h compress.c compress.h \
@@ -929,7 +947,7 @@ UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
chrom.c chrom.h segmentpos.c segmentpos.h \
chrnum.c chrnum.h \
maxent_hr.c maxent_hr.h \
- mapq.c mapq.h shortread.c shortread.h substring.c substring.h stage3hr.c stage3hr.h \
+ mapq.c mapq.h shortread.c shortread.h substring.c substring.h junction.c junction.h stage3hr.c stage3hr.h \
spanningelt.c spanningelt.h cmet.c cmet.h atoi.c atoi.h \
comp.h maxent.c maxent.h pairdef.h pair.c pair.h pairpool.c pairpool.h diag.c diag.h diagpool.c diagpool.h \
orderstat.c orderstat.h oligoindex_hr.c oligoindex_hr.h cellpool.c cellpool.h stage2.c stage2.h \
@@ -941,7 +959,7 @@ UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
chimera.c chimera.h sense.h fastlog.h stage3.c stage3.h \
splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \
splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \
- bytecoding.c bytecoding.h sarray-read.c sarray-read.h \
+ bytecoding.c bytecoding.h univdiagdef.h univdiag.c univdiag.h sarray-read.c sarray-read.h \
stage1hr.c stage1hr.h resulthr.c resulthr.h \
datadir.c datadir.h mode.h \
getopt.c getopt1.c getopt.h uniqscan.c
@@ -957,6 +975,7 @@ UNIQSCANL_FILES = fopen.h bool.h types.h separator.h comp.h \
littleendian.c littleendian.h bigendian.c bigendian.h \
univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h uint8list.c uint8list.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h reader.c reader.h \
genomicpos.c genomicpos.h compress.c compress.h \
@@ -968,7 +987,7 @@ UNIQSCANL_FILES = fopen.h bool.h types.h separator.h comp.h \
chrom.c chrom.h segmentpos.c segmentpos.h \
chrnum.c chrnum.h \
maxent_hr.c maxent_hr.h \
- mapq.c mapq.h shortread.c shortread.h substring.c substring.h stage3hr.c stage3hr.h \
+ mapq.c mapq.h shortread.c shortread.h substring.c substring.h junction.c junction.h stage3hr.c stage3hr.h \
spanningelt.c spanningelt.h cmet.c cmet.h atoi.c atoi.h \
comp.h maxent.c maxent.h pairdef.h pair.c pair.h pairpool.c pairpool.h diag.c diag.h diagpool.c diagpool.h \
orderstat.c orderstat.h oligoindex_hr.c oligoindex_hr.h cellpool.c cellpool.h stage2.c stage2.h \
@@ -995,6 +1014,7 @@ GMAPINDEX_FILES = fopen.h bool.h types.h \
littleendian.c littleendian.h bigendian.c bigendian.h \
univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iit-write-univ.c iit-write-univ.h \
iitdef.h iit-read.c iit-read.h \
md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h genome.c genome.h \
@@ -1019,6 +1039,7 @@ SNPINDEX_FILES = fopen.h bool.h types.h \
littleendian.c littleendian.h bigendian.c bigendian.h \
univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
genomicpos.c genomicpos.h compress.c compress.h compress-write.c compress-write.h \
popcount.c popcount.h genome128_hr.c genome128_hr.h \
@@ -1040,6 +1061,7 @@ CMETINDEX_FILES = fopen.h bool.h types.h \
genomicpos.c genomicpos.h \
stopwatch.c stopwatch.h access.c access.h \
univinterval.c univinterval.h interval.h interval.c \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.h iit-read.c \
complement.h compress.c compress.h compress-write.c compress-write.h \
popcount.c popcount.h genome128_hr.c genome128_hr.h \
@@ -1063,6 +1085,7 @@ ATOIINDEX_FILES = fopen.h bool.h types.h \
genomicpos.c genomicpos.h \
stopwatch.c stopwatch.h access.c access.h \
univinterval.c univinterval.h interval.h interval.c \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.h iit-read.c \
complement.h compress.c compress.h compress-write.c compress-write.h \
popcount.c popcount.h genome128_hr.c genome128_hr.h \
@@ -1086,6 +1109,7 @@ GET_GENOME_FILES = fopen.h bool.h types.h separator.h \
littleendian.c littleendian.h bigendian.c bigendian.h \
univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h \
genome.c genome.h \
@@ -1123,6 +1147,7 @@ IIT_GET_FILES = fopen.h bool.h types.h \
univinterval.c univinterval.h interval.c interval.h \
uintlist.c uintlist.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
complement.h parserange.c parserange.h getopt.c getopt1.c getopt.h iit_get.c
@@ -1137,6 +1162,7 @@ IIT_DUMP_FILES = fopen.h bool.h types.h \
intlistdef.h intlist.c intlist.h list.c list.h \
univinterval.c univinterval.h interval.c interval.h uintlist.c uintlist.h \
stopwatch.c stopwatch.h access.c access.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
getopt.c getopt1.c getopt.h iit_dump.c
@@ -1153,6 +1179,7 @@ SAM_SORT_FILES = bool.h types.h \
stopwatch.c stopwatch.h access.c access.h \
univinterval.c univinterval.h interval.c interval.h \
uintlist.c uintlist.h \
+ filestring.c filestring.h \
iit-read-univ.c iit-read-univ.h iitdef.h iit-read.c iit-read.h \
samflags.h samheader.c samheader.h samread.c samread.h \
datadir.c datadir.h \
@@ -1324,6 +1351,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/atoiindex-compress.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/atoiindex-datadir.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/atoiindex-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/atoiindex-filestring.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/atoiindex-genome.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/atoiindex-genome128_hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/atoiindex-genomicpos.Po at am__quote@
@@ -1361,6 +1389,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/cmetindex-compress.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/cmetindex-datadir.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/cmetindex-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/cmetindex-filestring.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/cmetindex-genome.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/cmetindex-genome128_hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/cmetindex-genomicpos.Po at am__quote@
@@ -1392,6 +1421,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/get_genome-chrom.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/get_genome-datadir.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/get_genome-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/get_genome-filestring.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/get_genome-genome.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/get_genome-genomicpos.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/get_genome-get-genome.Po at am__quote@
@@ -1439,6 +1469,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap-dynprog_simd.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap-dynprog_single.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap-filestring.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap-gbuffer.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap-genome-write.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap-genome.Po at am__quote@
@@ -1469,6 +1500,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap-oligoindex_hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap-orderstat.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap-outbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap-output.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap-pair.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap-pairpool.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap-pbinom.Po at am__quote@
@@ -1504,6 +1536,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapindex-compress-write.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapindex-compress.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapindex-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapindex-filestring.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapindex-genome-write.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapindex-genome.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapindex-genome128_hr.Po at am__quote@
@@ -1561,6 +1594,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl-dynprog_simd.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl-dynprog_single.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl-filestring.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl-gbuffer.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl-genome-write.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl-genome.Po at am__quote@
@@ -1591,6 +1625,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl-oligoindex_hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl-orderstat.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl-outbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl-output.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl-pair.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl-pairpool.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl-pbinom.Po at am__quote@
@@ -1641,6 +1676,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-dynprog_simd.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-dynprog_single.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-filestring.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-gbuffer.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-genome.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-genome128_hr.Po at am__quote@
@@ -1648,7 +1684,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-genomicpos.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-getopt.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-getopt1.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-goby.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-gsnap.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-iit-read-univ.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-iit-read.Po at am__quote@
@@ -1659,6 +1694,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-interval.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-intlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-intron.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-junction.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-list.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-littleendian.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-mapq.Po at am__quote@
@@ -1670,6 +1706,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-oligoindex_hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-orderstat.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-outbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-output.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-pair.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-pairpool.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-pbinom.Po at am__quote@
@@ -1697,6 +1734,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-substring.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-translation.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-uintlist.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-univdiag.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap-univinterval.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-access.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-assert.Po at am__quote@
@@ -1725,6 +1763,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-dynprog_simd.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-dynprog_single.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-filestring.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-gbuffer.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-genome.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-genome128_hr.Po at am__quote@
@@ -1732,7 +1771,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-genomicpos.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-getopt.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-getopt1.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-goby.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-gsnap.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-iit-read-univ.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-iit-read.Po at am__quote@
@@ -1743,6 +1781,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-interval.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-intlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-intron.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-junction.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-list.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-littleendian.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-mapq.Po at am__quote@
@@ -1754,6 +1793,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-oligoindex_hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-orderstat.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-outbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-output.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-pair.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-pairpool.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl-pbinom.Po at am__quote@
@@ -1786,6 +1826,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/iit_dump-assert.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/iit_dump-bigendian.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/iit_dump-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/iit_dump-filestring.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/iit_dump-getopt.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/iit_dump-getopt1.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/iit_dump-iit-read-univ.Po at am__quote@
@@ -1803,6 +1844,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/iit_get-assert.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/iit_get-bigendian.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/iit_get-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/iit_get-filestring.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/iit_get-getopt.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/iit_get-getopt1.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/iit_get-iit-read-univ.Po at am__quote@
@@ -1843,6 +1885,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/sam_sort-bigendian.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/sam_sort-datadir.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/sam_sort-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/sam_sort-filestring.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/sam_sort-getopt.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/sam_sort-getopt1.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/sam_sort-iit-read-univ.Po at am__quote@
@@ -1870,6 +1913,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/snpindex-compress.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/snpindex-datadir.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/snpindex-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/snpindex-filestring.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/snpindex-genome.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/snpindex-genome128_hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/snpindex-genomicpos.Po at am__quote@
@@ -1919,6 +1963,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-dynprog_simd.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-dynprog_single.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-filestring.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-genome.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-genome128_hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-genome_sites.Po at am__quote@
@@ -1933,6 +1978,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-interval.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-intlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-intron.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-junction.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-list.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-littleendian.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-mapq.Po at am__quote@
@@ -1968,6 +2014,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-translation.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-uintlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-uniqscan.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-univdiag.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-univinterval.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-access.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-assert.Po at am__quote@
@@ -1996,6 +2043,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-dynprog_simd.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-dynprog_single.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-filestring.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-genome.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-genome128_hr.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-genome_sites.Po at am__quote@
@@ -2010,6 +2058,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-interval.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-intlist.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-intron.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-junction.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-list.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-littleendian.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-mapq.Po at am__quote@
@@ -2208,6 +2257,20 @@ atoiindex-interval.obj: interval.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(atoiindex_CFLAGS) $(CFLAGS) -c -o atoiindex-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi`
+atoiindex-filestring.o: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(atoiindex_CFLAGS) $(CFLAGS) -MT atoiindex-filestring.o -MD -MP -MF $(DEPDIR)/atoiindex-filestring.Tpo -c -o atoiindex-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/atoiindex-filestring.Tpo $(DEPDIR)/atoiindex-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='atoiindex-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(atoiindex_CFLAGS) $(CFLAGS) -c -o atoiindex-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+atoiindex-filestring.obj: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(atoiindex_CFLAGS) $(CFLAGS) -MT atoiindex-filestring.obj -MD -MP -MF $(DEPDIR)/atoiindex-filestring.Tpo -c -o atoiindex-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/atoiindex-filestring.Tpo $(DEPDIR)/atoiindex-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='atoiindex-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(atoiindex_CFLAGS) $(CFLAGS) -c -o atoiindex-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+
atoiindex-iit-read-univ.o: iit-read-univ.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(atoiindex_CFLAGS) $(CFLAGS) -MT atoiindex-iit-read-univ.o -MD -MP -MF $(DEPDIR)/atoiindex-iit-read-univ.Tpo -c -o atoiindex-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/atoiindex-iit-read-univ.Tpo $(DEPDIR)/atoiindex-iit-read-univ.Po
@@ -2726,6 +2789,20 @@ cmetindex-interval.obj: interval.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cmetindex_CFLAGS) $(CFLAGS) -c -o cmetindex-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi`
+cmetindex-filestring.o: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cmetindex_CFLAGS) $(CFLAGS) -MT cmetindex-filestring.o -MD -MP -MF $(DEPDIR)/cmetindex-filestring.Tpo -c -o cmetindex-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/cmetindex-filestring.Tpo $(DEPDIR)/cmetindex-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='cmetindex-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cmetindex_CFLAGS) $(CFLAGS) -c -o cmetindex-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+cmetindex-filestring.obj: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cmetindex_CFLAGS) $(CFLAGS) -MT cmetindex-filestring.obj -MD -MP -MF $(DEPDIR)/cmetindex-filestring.Tpo -c -o cmetindex-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/cmetindex-filestring.Tpo $(DEPDIR)/cmetindex-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='cmetindex-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cmetindex_CFLAGS) $(CFLAGS) -c -o cmetindex-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+
cmetindex-iit-read-univ.o: iit-read-univ.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cmetindex_CFLAGS) $(CFLAGS) -MT cmetindex-iit-read-univ.o -MD -MP -MF $(DEPDIR)/cmetindex-iit-read-univ.Tpo -c -o cmetindex-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/cmetindex-iit-read-univ.Tpo $(DEPDIR)/cmetindex-iit-read-univ.Po
@@ -3272,6 +3349,20 @@ get_genome-access.obj: access.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(get_genome_CFLAGS) $(CFLAGS) -c -o get_genome-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+get_genome-filestring.o: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(get_genome_CFLAGS) $(CFLAGS) -MT get_genome-filestring.o -MD -MP -MF $(DEPDIR)/get_genome-filestring.Tpo -c -o get_genome-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/get_genome-filestring.Tpo $(DEPDIR)/get_genome-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='get_genome-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(get_genome_CFLAGS) $(CFLAGS) -c -o get_genome-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+get_genome-filestring.obj: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(get_genome_CFLAGS) $(CFLAGS) -MT get_genome-filestring.obj -MD -MP -MF $(DEPDIR)/get_genome-filestring.Tpo -c -o get_genome-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/get_genome-filestring.Tpo $(DEPDIR)/get_genome-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='get_genome-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(get_genome_CFLAGS) $(CFLAGS) -c -o get_genome-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+
get_genome-iit-read-univ.o: iit-read-univ.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(get_genome_CFLAGS) $(CFLAGS) -MT get_genome-iit-read-univ.o -MD -MP -MF $(DEPDIR)/get_genome-iit-read-univ.Tpo -c -o get_genome-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/get_genome-iit-read-univ.Tpo $(DEPDIR)/get_genome-iit-read-univ.Po
@@ -3636,6 +3727,20 @@ gmap-access.obj: access.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_CFLAGS) $(CFLAGS) -c -o gmap-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+gmap-filestring.o: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_CFLAGS) $(CFLAGS) -MT gmap-filestring.o -MD -MP -MF $(DEPDIR)/gmap-filestring.Tpo -c -o gmap-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gmap-filestring.Tpo $(DEPDIR)/gmap-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='gmap-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_CFLAGS) $(CFLAGS) -c -o gmap-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+gmap-filestring.obj: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_CFLAGS) $(CFLAGS) -MT gmap-filestring.obj -MD -MP -MF $(DEPDIR)/gmap-filestring.Tpo -c -o gmap-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gmap-filestring.Tpo $(DEPDIR)/gmap-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='gmap-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_CFLAGS) $(CFLAGS) -c -o gmap-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+
gmap-iit-read-univ.o: iit-read-univ.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_CFLAGS) $(CFLAGS) -MT gmap-iit-read-univ.o -MD -MP -MF $(DEPDIR)/gmap-iit-read-univ.Tpo -c -o gmap-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gmap-iit-read-univ.Tpo $(DEPDIR)/gmap-iit-read-univ.Po
@@ -4490,6 +4595,20 @@ gmap-result.obj: result.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_CFLAGS) $(CFLAGS) -c -o gmap-result.obj `if test -f 'result.c'; then $(CYGPATH_W) 'result.c'; else $(CYGPATH_W) '$(srcdir)/result.c'; fi`
+gmap-output.o: output.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_CFLAGS) $(CFLAGS) -MT gmap-output.o -MD -MP -MF $(DEPDIR)/gmap-output.Tpo -c -o gmap-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gmap-output.Tpo $(DEPDIR)/gmap-output.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='output.c' object='gmap-output.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_CFLAGS) $(CFLAGS) -c -o gmap-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c
+
+gmap-output.obj: output.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_CFLAGS) $(CFLAGS) -MT gmap-output.obj -MD -MP -MF $(DEPDIR)/gmap-output.Tpo -c -o gmap-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gmap-output.Tpo $(DEPDIR)/gmap-output.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='output.c' object='gmap-output.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_CFLAGS) $(CFLAGS) -c -o gmap-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi`
+
gmap-inbuffer.o: inbuffer.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_CFLAGS) $(CFLAGS) -MT gmap-inbuffer.o -MD -MP -MF $(DEPDIR)/gmap-inbuffer.Tpo -c -o gmap-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gmap-inbuffer.Tpo $(DEPDIR)/gmap-inbuffer.Po
@@ -4770,6 +4889,20 @@ gmapindex-access.obj: access.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapindex_CFLAGS) $(CFLAGS) -c -o gmapindex-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+gmapindex-filestring.o: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapindex_CFLAGS) $(CFLAGS) -MT gmapindex-filestring.o -MD -MP -MF $(DEPDIR)/gmapindex-filestring.Tpo -c -o gmapindex-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gmapindex-filestring.Tpo $(DEPDIR)/gmapindex-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='gmapindex-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapindex_CFLAGS) $(CFLAGS) -c -o gmapindex-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+gmapindex-filestring.obj: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapindex_CFLAGS) $(CFLAGS) -MT gmapindex-filestring.obj -MD -MP -MF $(DEPDIR)/gmapindex-filestring.Tpo -c -o gmapindex-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gmapindex-filestring.Tpo $(DEPDIR)/gmapindex-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='gmapindex-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapindex_CFLAGS) $(CFLAGS) -c -o gmapindex-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+
gmapindex-iit-read-univ.o: iit-read-univ.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapindex_CFLAGS) $(CFLAGS) -MT gmapindex-iit-read-univ.o -MD -MP -MF $(DEPDIR)/gmapindex-iit-read-univ.Tpo -c -o gmapindex-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gmapindex-iit-read-univ.Tpo $(DEPDIR)/gmapindex-iit-read-univ.Po
@@ -5344,6 +5477,20 @@ gmapl-access.obj: access.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_CFLAGS) $(CFLAGS) -c -o gmapl-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+gmapl-filestring.o: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_CFLAGS) $(CFLAGS) -MT gmapl-filestring.o -MD -MP -MF $(DEPDIR)/gmapl-filestring.Tpo -c -o gmapl-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gmapl-filestring.Tpo $(DEPDIR)/gmapl-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='gmapl-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_CFLAGS) $(CFLAGS) -c -o gmapl-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+gmapl-filestring.obj: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_CFLAGS) $(CFLAGS) -MT gmapl-filestring.obj -MD -MP -MF $(DEPDIR)/gmapl-filestring.Tpo -c -o gmapl-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gmapl-filestring.Tpo $(DEPDIR)/gmapl-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='gmapl-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_CFLAGS) $(CFLAGS) -c -o gmapl-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+
gmapl-iit-read-univ.o: iit-read-univ.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_CFLAGS) $(CFLAGS) -MT gmapl-iit-read-univ.o -MD -MP -MF $(DEPDIR)/gmapl-iit-read-univ.Tpo -c -o gmapl-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gmapl-iit-read-univ.Tpo $(DEPDIR)/gmapl-iit-read-univ.Po
@@ -6198,6 +6345,20 @@ gmapl-result.obj: result.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_CFLAGS) $(CFLAGS) -c -o gmapl-result.obj `if test -f 'result.c'; then $(CYGPATH_W) 'result.c'; else $(CYGPATH_W) '$(srcdir)/result.c'; fi`
+gmapl-output.o: output.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_CFLAGS) $(CFLAGS) -MT gmapl-output.o -MD -MP -MF $(DEPDIR)/gmapl-output.Tpo -c -o gmapl-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gmapl-output.Tpo $(DEPDIR)/gmapl-output.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='output.c' object='gmapl-output.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_CFLAGS) $(CFLAGS) -c -o gmapl-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c
+
+gmapl-output.obj: output.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_CFLAGS) $(CFLAGS) -MT gmapl-output.obj -MD -MP -MF $(DEPDIR)/gmapl-output.Tpo -c -o gmapl-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gmapl-output.Tpo $(DEPDIR)/gmapl-output.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='output.c' object='gmapl-output.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_CFLAGS) $(CFLAGS) -c -o gmapl-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi`
+
gmapl-inbuffer.o: inbuffer.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_CFLAGS) $(CFLAGS) -MT gmapl-inbuffer.o -MD -MP -MF $(DEPDIR)/gmapl-inbuffer.Tpo -c -o gmapl-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gmapl-inbuffer.Tpo $(DEPDIR)/gmapl-inbuffer.Po
@@ -6478,6 +6639,20 @@ gsnap-access.obj: access.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -c -o gsnap-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+gsnap-filestring.o: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -MT gsnap-filestring.o -MD -MP -MF $(DEPDIR)/gsnap-filestring.Tpo -c -o gsnap-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap-filestring.Tpo $(DEPDIR)/gsnap-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='gsnap-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -c -o gsnap-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+gsnap-filestring.obj: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -MT gsnap-filestring.obj -MD -MP -MF $(DEPDIR)/gsnap-filestring.Tpo -c -o gsnap-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap-filestring.Tpo $(DEPDIR)/gsnap-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='gsnap-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -c -o gsnap-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+
gsnap-iit-read-univ.o: iit-read-univ.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -MT gsnap-iit-read-univ.o -MD -MP -MF $(DEPDIR)/gsnap-iit-read-univ.Tpo -c -o gsnap-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap-iit-read-univ.Tpo $(DEPDIR)/gsnap-iit-read-univ.Po
@@ -6828,6 +7003,20 @@ gsnap-substring.obj: substring.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -c -o gsnap-substring.obj `if test -f 'substring.c'; then $(CYGPATH_W) 'substring.c'; else $(CYGPATH_W) '$(srcdir)/substring.c'; fi`
+gsnap-junction.o: junction.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -MT gsnap-junction.o -MD -MP -MF $(DEPDIR)/gsnap-junction.Tpo -c -o gsnap-junction.o `test -f 'junction.c' || echo '$(srcdir)/'`junction.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap-junction.Tpo $(DEPDIR)/gsnap-junction.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='junction.c' object='gsnap-junction.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -c -o gsnap-junction.o `test -f 'junction.c' || echo '$(srcdir)/'`junction.c
+
+gsnap-junction.obj: junction.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -MT gsnap-junction.obj -MD -MP -MF $(DEPDIR)/gsnap-junction.Tpo -c -o gsnap-junction.obj `if test -f 'junction.c'; then $(CYGPATH_W) 'junction.c'; else $(CYGPATH_W) '$(srcdir)/junction.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap-junction.Tpo $(DEPDIR)/gsnap-junction.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='junction.c' object='gsnap-junction.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -c -o gsnap-junction.obj `if test -f 'junction.c'; then $(CYGPATH_W) 'junction.c'; else $(CYGPATH_W) '$(srcdir)/junction.c'; fi`
+
gsnap-stage3hr.o: stage3hr.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -MT gsnap-stage3hr.o -MD -MP -MF $(DEPDIR)/gsnap-stage3hr.Tpo -c -o gsnap-stage3hr.o `test -f 'stage3hr.c' || echo '$(srcdir)/'`stage3hr.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap-stage3hr.Tpo $(DEPDIR)/gsnap-stage3hr.Po
@@ -6842,20 +7031,6 @@ gsnap-stage3hr.obj: stage3hr.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -c -o gsnap-stage3hr.obj `if test -f 'stage3hr.c'; then $(CYGPATH_W) 'stage3hr.c'; else $(CYGPATH_W) '$(srcdir)/stage3hr.c'; fi`
-gsnap-goby.o: goby.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -MT gsnap-goby.o -MD -MP -MF $(DEPDIR)/gsnap-goby.Tpo -c -o gsnap-goby.o `test -f 'goby.c' || echo '$(srcdir)/'`goby.c
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap-goby.Tpo $(DEPDIR)/gsnap-goby.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='goby.c' object='gsnap-goby.o' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -c -o gsnap-goby.o `test -f 'goby.c' || echo '$(srcdir)/'`goby.c
-
-gsnap-goby.obj: goby.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -MT gsnap-goby.obj -MD -MP -MF $(DEPDIR)/gsnap-goby.Tpo -c -o gsnap-goby.obj `if test -f 'goby.c'; then $(CYGPATH_W) 'goby.c'; else $(CYGPATH_W) '$(srcdir)/goby.c'; fi`
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap-goby.Tpo $(DEPDIR)/gsnap-goby.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='goby.c' object='gsnap-goby.obj' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -c -o gsnap-goby.obj `if test -f 'goby.c'; then $(CYGPATH_W) 'goby.c'; else $(CYGPATH_W) '$(srcdir)/goby.c'; fi`
-
gsnap-spanningelt.o: spanningelt.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -MT gsnap-spanningelt.o -MD -MP -MF $(DEPDIR)/gsnap-spanningelt.Tpo -c -o gsnap-spanningelt.o `test -f 'spanningelt.c' || echo '$(srcdir)/'`spanningelt.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap-spanningelt.Tpo $(DEPDIR)/gsnap-spanningelt.Po
@@ -7346,6 +7521,20 @@ gsnap-bytecoding.obj: bytecoding.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -c -o gsnap-bytecoding.obj `if test -f 'bytecoding.c'; then $(CYGPATH_W) 'bytecoding.c'; else $(CYGPATH_W) '$(srcdir)/bytecoding.c'; fi`
+gsnap-univdiag.o: univdiag.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -MT gsnap-univdiag.o -MD -MP -MF $(DEPDIR)/gsnap-univdiag.Tpo -c -o gsnap-univdiag.o `test -f 'univdiag.c' || echo '$(srcdir)/'`univdiag.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap-univdiag.Tpo $(DEPDIR)/gsnap-univdiag.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='univdiag.c' object='gsnap-univdiag.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -c -o gsnap-univdiag.o `test -f 'univdiag.c' || echo '$(srcdir)/'`univdiag.c
+
+gsnap-univdiag.obj: univdiag.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -MT gsnap-univdiag.obj -MD -MP -MF $(DEPDIR)/gsnap-univdiag.Tpo -c -o gsnap-univdiag.obj `if test -f 'univdiag.c'; then $(CYGPATH_W) 'univdiag.c'; else $(CYGPATH_W) '$(srcdir)/univdiag.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap-univdiag.Tpo $(DEPDIR)/gsnap-univdiag.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='univdiag.c' object='gsnap-univdiag.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -c -o gsnap-univdiag.obj `if test -f 'univdiag.c'; then $(CYGPATH_W) 'univdiag.c'; else $(CYGPATH_W) '$(srcdir)/univdiag.c'; fi`
+
gsnap-sarray-read.o: sarray-read.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -MT gsnap-sarray-read.o -MD -MP -MF $(DEPDIR)/gsnap-sarray-read.Tpo -c -o gsnap-sarray-read.o `test -f 'sarray-read.c' || echo '$(srcdir)/'`sarray-read.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap-sarray-read.Tpo $(DEPDIR)/gsnap-sarray-read.Po
@@ -7402,6 +7591,20 @@ gsnap-resulthr.obj: resulthr.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -c -o gsnap-resulthr.obj `if test -f 'resulthr.c'; then $(CYGPATH_W) 'resulthr.c'; else $(CYGPATH_W) '$(srcdir)/resulthr.c'; fi`
+gsnap-output.o: output.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -MT gsnap-output.o -MD -MP -MF $(DEPDIR)/gsnap-output.Tpo -c -o gsnap-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap-output.Tpo $(DEPDIR)/gsnap-output.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='output.c' object='gsnap-output.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -c -o gsnap-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c
+
+gsnap-output.obj: output.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -MT gsnap-output.obj -MD -MP -MF $(DEPDIR)/gsnap-output.Tpo -c -o gsnap-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap-output.Tpo $(DEPDIR)/gsnap-output.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='output.c' object='gsnap-output.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -c -o gsnap-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi`
+
gsnap-inbuffer.o: inbuffer.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_CFLAGS) $(CFLAGS) -MT gsnap-inbuffer.o -MD -MP -MF $(DEPDIR)/gsnap-inbuffer.Tpo -c -o gsnap-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnap-inbuffer.Tpo $(DEPDIR)/gsnap-inbuffer.Po
@@ -7682,6 +7885,20 @@ gsnapl-access.obj: access.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -c -o gsnapl-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+gsnapl-filestring.o: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -MT gsnapl-filestring.o -MD -MP -MF $(DEPDIR)/gsnapl-filestring.Tpo -c -o gsnapl-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl-filestring.Tpo $(DEPDIR)/gsnapl-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='gsnapl-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -c -o gsnapl-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+gsnapl-filestring.obj: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -MT gsnapl-filestring.obj -MD -MP -MF $(DEPDIR)/gsnapl-filestring.Tpo -c -o gsnapl-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl-filestring.Tpo $(DEPDIR)/gsnapl-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='gsnapl-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -c -o gsnapl-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+
gsnapl-iit-read-univ.o: iit-read-univ.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -MT gsnapl-iit-read-univ.o -MD -MP -MF $(DEPDIR)/gsnapl-iit-read-univ.Tpo -c -o gsnapl-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl-iit-read-univ.Tpo $(DEPDIR)/gsnapl-iit-read-univ.Po
@@ -8032,6 +8249,20 @@ gsnapl-substring.obj: substring.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -c -o gsnapl-substring.obj `if test -f 'substring.c'; then $(CYGPATH_W) 'substring.c'; else $(CYGPATH_W) '$(srcdir)/substring.c'; fi`
+gsnapl-junction.o: junction.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -MT gsnapl-junction.o -MD -MP -MF $(DEPDIR)/gsnapl-junction.Tpo -c -o gsnapl-junction.o `test -f 'junction.c' || echo '$(srcdir)/'`junction.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl-junction.Tpo $(DEPDIR)/gsnapl-junction.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='junction.c' object='gsnapl-junction.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -c -o gsnapl-junction.o `test -f 'junction.c' || echo '$(srcdir)/'`junction.c
+
+gsnapl-junction.obj: junction.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -MT gsnapl-junction.obj -MD -MP -MF $(DEPDIR)/gsnapl-junction.Tpo -c -o gsnapl-junction.obj `if test -f 'junction.c'; then $(CYGPATH_W) 'junction.c'; else $(CYGPATH_W) '$(srcdir)/junction.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl-junction.Tpo $(DEPDIR)/gsnapl-junction.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='junction.c' object='gsnapl-junction.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -c -o gsnapl-junction.obj `if test -f 'junction.c'; then $(CYGPATH_W) 'junction.c'; else $(CYGPATH_W) '$(srcdir)/junction.c'; fi`
+
gsnapl-stage3hr.o: stage3hr.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -MT gsnapl-stage3hr.o -MD -MP -MF $(DEPDIR)/gsnapl-stage3hr.Tpo -c -o gsnapl-stage3hr.o `test -f 'stage3hr.c' || echo '$(srcdir)/'`stage3hr.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl-stage3hr.Tpo $(DEPDIR)/gsnapl-stage3hr.Po
@@ -8046,20 +8277,6 @@ gsnapl-stage3hr.obj: stage3hr.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -c -o gsnapl-stage3hr.obj `if test -f 'stage3hr.c'; then $(CYGPATH_W) 'stage3hr.c'; else $(CYGPATH_W) '$(srcdir)/stage3hr.c'; fi`
-gsnapl-goby.o: goby.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -MT gsnapl-goby.o -MD -MP -MF $(DEPDIR)/gsnapl-goby.Tpo -c -o gsnapl-goby.o `test -f 'goby.c' || echo '$(srcdir)/'`goby.c
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl-goby.Tpo $(DEPDIR)/gsnapl-goby.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='goby.c' object='gsnapl-goby.o' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -c -o gsnapl-goby.o `test -f 'goby.c' || echo '$(srcdir)/'`goby.c
-
-gsnapl-goby.obj: goby.c
- at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -MT gsnapl-goby.obj -MD -MP -MF $(DEPDIR)/gsnapl-goby.Tpo -c -o gsnapl-goby.obj `if test -f 'goby.c'; then $(CYGPATH_W) 'goby.c'; else $(CYGPATH_W) '$(srcdir)/goby.c'; fi`
- at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl-goby.Tpo $(DEPDIR)/gsnapl-goby.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='goby.c' object='gsnapl-goby.obj' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -c -o gsnapl-goby.obj `if test -f 'goby.c'; then $(CYGPATH_W) 'goby.c'; else $(CYGPATH_W) '$(srcdir)/goby.c'; fi`
-
gsnapl-spanningelt.o: spanningelt.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -MT gsnapl-spanningelt.o -MD -MP -MF $(DEPDIR)/gsnapl-spanningelt.Tpo -c -o gsnapl-spanningelt.o `test -f 'spanningelt.c' || echo '$(srcdir)/'`spanningelt.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl-spanningelt.Tpo $(DEPDIR)/gsnapl-spanningelt.Po
@@ -8578,6 +8795,20 @@ gsnapl-resulthr.obj: resulthr.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -c -o gsnapl-resulthr.obj `if test -f 'resulthr.c'; then $(CYGPATH_W) 'resulthr.c'; else $(CYGPATH_W) '$(srcdir)/resulthr.c'; fi`
+gsnapl-output.o: output.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -MT gsnapl-output.o -MD -MP -MF $(DEPDIR)/gsnapl-output.Tpo -c -o gsnapl-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl-output.Tpo $(DEPDIR)/gsnapl-output.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='output.c' object='gsnapl-output.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -c -o gsnapl-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c
+
+gsnapl-output.obj: output.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -MT gsnapl-output.obj -MD -MP -MF $(DEPDIR)/gsnapl-output.Tpo -c -o gsnapl-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl-output.Tpo $(DEPDIR)/gsnapl-output.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='output.c' object='gsnapl-output.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -c -o gsnapl-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi`
+
gsnapl-inbuffer.o: inbuffer.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_CFLAGS) $(CFLAGS) -MT gsnapl-inbuffer.o -MD -MP -MF $(DEPDIR)/gsnapl-inbuffer.Tpo -c -o gsnapl-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/gsnapl-inbuffer.Tpo $(DEPDIR)/gsnapl-inbuffer.Po
@@ -8844,6 +9075,20 @@ iit_dump-access.obj: access.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(iit_dump_CFLAGS) $(CFLAGS) -c -o iit_dump-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+iit_dump-filestring.o: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(iit_dump_CFLAGS) $(CFLAGS) -MT iit_dump-filestring.o -MD -MP -MF $(DEPDIR)/iit_dump-filestring.Tpo -c -o iit_dump-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/iit_dump-filestring.Tpo $(DEPDIR)/iit_dump-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='iit_dump-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(iit_dump_CFLAGS) $(CFLAGS) -c -o iit_dump-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+iit_dump-filestring.obj: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(iit_dump_CFLAGS) $(CFLAGS) -MT iit_dump-filestring.obj -MD -MP -MF $(DEPDIR)/iit_dump-filestring.Tpo -c -o iit_dump-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/iit_dump-filestring.Tpo $(DEPDIR)/iit_dump-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='iit_dump-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(iit_dump_CFLAGS) $(CFLAGS) -c -o iit_dump-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+
iit_dump-iit-read-univ.o: iit-read-univ.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(iit_dump_CFLAGS) $(CFLAGS) -MT iit_dump-iit-read-univ.o -MD -MP -MF $(DEPDIR)/iit_dump-iit-read-univ.Tpo -c -o iit_dump-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/iit_dump-iit-read-univ.Tpo $(DEPDIR)/iit_dump-iit-read-univ.Po
@@ -9082,6 +9327,20 @@ iit_get-access.obj: access.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(iit_get_CFLAGS) $(CFLAGS) -c -o iit_get-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+iit_get-filestring.o: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(iit_get_CFLAGS) $(CFLAGS) -MT iit_get-filestring.o -MD -MP -MF $(DEPDIR)/iit_get-filestring.Tpo -c -o iit_get-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/iit_get-filestring.Tpo $(DEPDIR)/iit_get-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='iit_get-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(iit_get_CFLAGS) $(CFLAGS) -c -o iit_get-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+iit_get-filestring.obj: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(iit_get_CFLAGS) $(CFLAGS) -MT iit_get-filestring.obj -MD -MP -MF $(DEPDIR)/iit_get-filestring.Tpo -c -o iit_get-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/iit_get-filestring.Tpo $(DEPDIR)/iit_get-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='iit_get-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(iit_get_CFLAGS) $(CFLAGS) -c -o iit_get-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+
iit_get-iit-read-univ.o: iit-read-univ.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(iit_get_CFLAGS) $(CFLAGS) -MT iit_get-iit-read-univ.o -MD -MP -MF $(DEPDIR)/iit_get-iit-read-univ.Tpo -c -o iit_get-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/iit_get-iit-read-univ.Tpo $(DEPDIR)/iit_get-iit-read-univ.Po
@@ -9628,6 +9887,20 @@ sam_sort-uintlist.obj: uintlist.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sam_sort_CFLAGS) $(CFLAGS) -c -o sam_sort-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi`
+sam_sort-filestring.o: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sam_sort_CFLAGS) $(CFLAGS) -MT sam_sort-filestring.o -MD -MP -MF $(DEPDIR)/sam_sort-filestring.Tpo -c -o sam_sort-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/sam_sort-filestring.Tpo $(DEPDIR)/sam_sort-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='sam_sort-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sam_sort_CFLAGS) $(CFLAGS) -c -o sam_sort-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+sam_sort-filestring.obj: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sam_sort_CFLAGS) $(CFLAGS) -MT sam_sort-filestring.obj -MD -MP -MF $(DEPDIR)/sam_sort-filestring.Tpo -c -o sam_sort-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/sam_sort-filestring.Tpo $(DEPDIR)/sam_sort-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='sam_sort-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sam_sort_CFLAGS) $(CFLAGS) -c -o sam_sort-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+
sam_sort-iit-read-univ.o: iit-read-univ.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sam_sort_CFLAGS) $(CFLAGS) -MT sam_sort-iit-read-univ.o -MD -MP -MF $(DEPDIR)/sam_sort-iit-read-univ.Tpo -c -o sam_sort-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/sam_sort-iit-read-univ.Tpo $(DEPDIR)/sam_sort-iit-read-univ.Po
@@ -9908,6 +10181,20 @@ snpindex-access.obj: access.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(snpindex_CFLAGS) $(CFLAGS) -c -o snpindex-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+snpindex-filestring.o: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(snpindex_CFLAGS) $(CFLAGS) -MT snpindex-filestring.o -MD -MP -MF $(DEPDIR)/snpindex-filestring.Tpo -c -o snpindex-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/snpindex-filestring.Tpo $(DEPDIR)/snpindex-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='snpindex-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(snpindex_CFLAGS) $(CFLAGS) -c -o snpindex-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+snpindex-filestring.obj: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(snpindex_CFLAGS) $(CFLAGS) -MT snpindex-filestring.obj -MD -MP -MF $(DEPDIR)/snpindex-filestring.Tpo -c -o snpindex-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/snpindex-filestring.Tpo $(DEPDIR)/snpindex-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='snpindex-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(snpindex_CFLAGS) $(CFLAGS) -c -o snpindex-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+
snpindex-iit-read-univ.o: iit-read-univ.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(snpindex_CFLAGS) $(CFLAGS) -MT snpindex-iit-read-univ.o -MD -MP -MF $(DEPDIR)/snpindex-iit-read-univ.Tpo -c -o snpindex-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/snpindex-iit-read-univ.Tpo $(DEPDIR)/snpindex-iit-read-univ.Po
@@ -10370,6 +10657,20 @@ uniqscan-access.obj: access.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+uniqscan-filestring.o: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-filestring.o -MD -MP -MF $(DEPDIR)/uniqscan-filestring.Tpo -c -o uniqscan-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/uniqscan-filestring.Tpo $(DEPDIR)/uniqscan-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='uniqscan-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+uniqscan-filestring.obj: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-filestring.obj -MD -MP -MF $(DEPDIR)/uniqscan-filestring.Tpo -c -o uniqscan-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/uniqscan-filestring.Tpo $(DEPDIR)/uniqscan-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='uniqscan-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+
uniqscan-iit-read-univ.o: iit-read-univ.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-iit-read-univ.o -MD -MP -MF $(DEPDIR)/uniqscan-iit-read-univ.Tpo -c -o uniqscan-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/uniqscan-iit-read-univ.Tpo $(DEPDIR)/uniqscan-iit-read-univ.Po
@@ -10706,6 +11007,20 @@ uniqscan-substring.obj: substring.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-substring.obj `if test -f 'substring.c'; then $(CYGPATH_W) 'substring.c'; else $(CYGPATH_W) '$(srcdir)/substring.c'; fi`
+uniqscan-junction.o: junction.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-junction.o -MD -MP -MF $(DEPDIR)/uniqscan-junction.Tpo -c -o uniqscan-junction.o `test -f 'junction.c' || echo '$(srcdir)/'`junction.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/uniqscan-junction.Tpo $(DEPDIR)/uniqscan-junction.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='junction.c' object='uniqscan-junction.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-junction.o `test -f 'junction.c' || echo '$(srcdir)/'`junction.c
+
+uniqscan-junction.obj: junction.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-junction.obj -MD -MP -MF $(DEPDIR)/uniqscan-junction.Tpo -c -o uniqscan-junction.obj `if test -f 'junction.c'; then $(CYGPATH_W) 'junction.c'; else $(CYGPATH_W) '$(srcdir)/junction.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/uniqscan-junction.Tpo $(DEPDIR)/uniqscan-junction.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='junction.c' object='uniqscan-junction.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-junction.obj `if test -f 'junction.c'; then $(CYGPATH_W) 'junction.c'; else $(CYGPATH_W) '$(srcdir)/junction.c'; fi`
+
uniqscan-stage3hr.o: stage3hr.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-stage3hr.o -MD -MP -MF $(DEPDIR)/uniqscan-stage3hr.Tpo -c -o uniqscan-stage3hr.o `test -f 'stage3hr.c' || echo '$(srcdir)/'`stage3hr.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/uniqscan-stage3hr.Tpo $(DEPDIR)/uniqscan-stage3hr.Po
@@ -11196,6 +11511,20 @@ uniqscan-bytecoding.obj: bytecoding.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-bytecoding.obj `if test -f 'bytecoding.c'; then $(CYGPATH_W) 'bytecoding.c'; else $(CYGPATH_W) '$(srcdir)/bytecoding.c'; fi`
+uniqscan-univdiag.o: univdiag.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-univdiag.o -MD -MP -MF $(DEPDIR)/uniqscan-univdiag.Tpo -c -o uniqscan-univdiag.o `test -f 'univdiag.c' || echo '$(srcdir)/'`univdiag.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/uniqscan-univdiag.Tpo $(DEPDIR)/uniqscan-univdiag.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='univdiag.c' object='uniqscan-univdiag.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-univdiag.o `test -f 'univdiag.c' || echo '$(srcdir)/'`univdiag.c
+
+uniqscan-univdiag.obj: univdiag.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-univdiag.obj -MD -MP -MF $(DEPDIR)/uniqscan-univdiag.Tpo -c -o uniqscan-univdiag.obj `if test -f 'univdiag.c'; then $(CYGPATH_W) 'univdiag.c'; else $(CYGPATH_W) '$(srcdir)/univdiag.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/uniqscan-univdiag.Tpo $(DEPDIR)/uniqscan-univdiag.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='univdiag.c' object='uniqscan-univdiag.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-univdiag.obj `if test -f 'univdiag.c'; then $(CYGPATH_W) 'univdiag.c'; else $(CYGPATH_W) '$(srcdir)/univdiag.c'; fi`
+
uniqscan-sarray-read.o: sarray-read.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-sarray-read.o -MD -MP -MF $(DEPDIR)/uniqscan-sarray-read.Tpo -c -o uniqscan-sarray-read.o `test -f 'sarray-read.c' || echo '$(srcdir)/'`sarray-read.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/uniqscan-sarray-read.Tpo $(DEPDIR)/uniqscan-sarray-read.Po
@@ -11476,6 +11805,20 @@ uniqscanl-access.obj: access.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+uniqscanl-filestring.o: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-filestring.o -MD -MP -MF $(DEPDIR)/uniqscanl-filestring.Tpo -c -o uniqscanl-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/uniqscanl-filestring.Tpo $(DEPDIR)/uniqscanl-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='uniqscanl-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+uniqscanl-filestring.obj: filestring.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-filestring.obj -MD -MP -MF $(DEPDIR)/uniqscanl-filestring.Tpo -c -o uniqscanl-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/uniqscanl-filestring.Tpo $(DEPDIR)/uniqscanl-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='filestring.c' object='uniqscanl-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+
uniqscanl-iit-read-univ.o: iit-read-univ.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-iit-read-univ.o -MD -MP -MF $(DEPDIR)/uniqscanl-iit-read-univ.Tpo -c -o uniqscanl-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/uniqscanl-iit-read-univ.Tpo $(DEPDIR)/uniqscanl-iit-read-univ.Po
@@ -11812,6 +12155,20 @@ uniqscanl-substring.obj: substring.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-substring.obj `if test -f 'substring.c'; then $(CYGPATH_W) 'substring.c'; else $(CYGPATH_W) '$(srcdir)/substring.c'; fi`
+uniqscanl-junction.o: junction.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-junction.o -MD -MP -MF $(DEPDIR)/uniqscanl-junction.Tpo -c -o uniqscanl-junction.o `test -f 'junction.c' || echo '$(srcdir)/'`junction.c
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/uniqscanl-junction.Tpo $(DEPDIR)/uniqscanl-junction.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='junction.c' object='uniqscanl-junction.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-junction.o `test -f 'junction.c' || echo '$(srcdir)/'`junction.c
+
+uniqscanl-junction.obj: junction.c
+ at am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-junction.obj -MD -MP -MF $(DEPDIR)/uniqscanl-junction.Tpo -c -o uniqscanl-junction.obj `if test -f 'junction.c'; then $(CYGPATH_W) 'junction.c'; else $(CYGPATH_W) '$(srcdir)/junction.c'; fi`
+ at am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/uniqscanl-junction.Tpo $(DEPDIR)/uniqscanl-junction.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ source='junction.c' object='uniqscanl-junction.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-junction.obj `if test -f 'junction.c'; then $(CYGPATH_W) 'junction.c'; else $(CYGPATH_W) '$(srcdir)/junction.c'; fi`
+
uniqscanl-stage3hr.o: stage3hr.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-stage3hr.o -MD -MP -MF $(DEPDIR)/uniqscanl-stage3hr.Tpo -c -o uniqscanl-stage3hr.o `test -f 'stage3hr.c' || echo '$(srcdir)/'`stage3hr.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/uniqscanl-stage3hr.Tpo $(DEPDIR)/uniqscanl-stage3hr.Po
diff --git a/src/access.c b/src/access.c
index 31e4cf5..9be6d68 100644
--- a/src/access.c
+++ b/src/access.c
@@ -1,9 +1,11 @@
-static char rcsid[] = "$Id: access.c 153955 2014-11-24 17:54:45Z twu $";
+static char rcsid[] = "$Id: access.c 165967 2015-05-20 00:15:27Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "access.h"
+#include "list.h"
+#include "intlist.h"
#include <stdio.h>
#include <stddef.h>
@@ -13,15 +15,23 @@ static char rcsid[] = "$Id: access.c 153955 2014-11-24 17:54:45Z twu $";
/* <unistd.h> and <sys/types.h> included in access.h */
#include <sys/mman.h> /* For mmap */
+
+#define PROJECT_ID 42
+#include <sys/ipc.h>
+#include <sys/shm.h> /* For shmat and shmdt */
+#include <sys/sem.h> /* For semaphores */
+
+#ifdef USE_MPI
+#include <mpi.h>
+#endif
+
#ifdef HAVE_FCNTL_H
#include <fcntl.h> /* For open */
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h> /* For open and fstat */
#endif
-/* Not sure why this was included
-#include <errno.h>
-*/
+
#ifdef PAGESIZE_VIA_SYSCONF
#include <unistd.h>
#endif
@@ -264,93 +274,416 @@ first_nonzero_uint8 (size_t *i, char *filename) {
#endif
-#define FREAD_BATCH 100000000 /* 100 million elements at a time */
-/* Bigendian conversion not needed after this */
-void *
-Access_allocated (size_t *len, double *seconds, char *filename, size_t eltsize) {
+/************************************************************************
+ * Functions for shared memory and semaphores
+ ************************************************************************/
+
+static List_T shmem_memory = NULL;
+static Intlist_T shmem_ids = NULL;
+static Intlist_T semaphore_ids = NULL;
+
+
+#define SEMAPHORE_NA 0 /* For commands, like removal, where semnum
+ argument is ignored */
+#define SEMAPHORE_CREATION 0 /* -1 to lock, +1 to unlock */
+
+/* See if item is already in shared memory */
+static bool
+shmem_exists_p (int *shmid, key_t key) {
+ if ((*shmid = shmget(key,0,0)) == -1) {
+ return false;
+ } else {
+ return true;
+ }
+}
+
+
+static short
+shmem_nattached (int shmid) {
+ struct shmid_ds buf;
+
+ if (shmctl(shmid,IPC_STAT,&buf) == -1) {
+#if 0
+ fprintf(stderr,"Error in shmem_nattached with shmctl. Error %d: %s\n",
+ errno,strerror(errno));
+#endif
+ return -1;
+ } else {
+ return buf.shm_nattch;
+ }
+}
+
+
+void
+Access_shmem_remove (char *filename) {
+ key_t key;
+ int shmid, semid;
+ struct shmid_ds *buf = NULL;
+
+ key = ftok(filename,PROJECT_ID);
+ if (shmem_exists_p(&shmid,key) == false) {
+ /* Nothing to do */
+ } else if (shmctl(shmid,IPC_RMID,buf) == -1) {
+ fprintf(stderr,"Error with shmctl. Error %d: %s\n",errno,strerror(errno));
+ } else {
+ fprintf(stderr,"Successfully removed existing memory\n");
+ }
+
+ if ((semid = semget(key,/*nsems*/0,0)) == -1) {
+ /* Nothing to do */
+ } else {
+ fprintf(stderr,"Removing semaphore set %d\n",semid);
+ semctl(semid,SEMAPHORE_NA,IPC_RMID,NULL);
+ }
+
+ return;
+}
+
+static void
+semaphore_init (int semid, int sem_i, int value) {
+ union semun {
+ int val;
+ struct semid_ds *buf;
+ ushort *array;
+ } argument;
+
+ argument.val = value;
+ semctl(semid,sem_i,SETVAL,argument);
+ return;
+}
+
+#if 0
+static int
+semaphore_value (int semid, int sem_i) {
+ return semctl(semid,sem_i,GETVAL,NULL);
+}
+#endif
+
+#if 0
+static int
+semaphore_nwaiting (int semid, int sem_i) {
+ printf("nwaiting = %d\n",semctl(semid,sem_i,GETNCNT,NULL));
+ return semctl(semid,sem_i,GETNCNT,NULL);
+}
+#endif
+
+#if 0
+/* If already locked, then puts process to sleep */
+static void
+semaphore_lock (int semid) {
+ struct sembuf op;
+
+ /* printf("Process %d locking semaphore %d\n",getpid(),semid); */
+ op.sem_num = SEMAPHORE_CREATION;
+ op.sem_op = -1;
+ op.sem_flg = SEM_UNDO;
+ semop(semid,&op,1);
+
+ return;
+}
+#endif
+
+#if 0
+static int
+semaphore_unlock (int semid) {
+ struct sembuf op;
+
+ /* printf("Process %d unlocking semaphore %d\n",getpid(),semid); */
+ op.sem_num = SEMAPHORE_CREATION;
+ op.sem_op = +1;
+ op.sem_flg = SEM_UNDO;
+ semop(semid,&op,1);
+
+ /* printf("%d processes still waiting\n",semctl(semid,SEMAPHORE_CREATION,GETNCNT,NULL)); */
+
+ return semctl(semid,SEMAPHORE_CREATION,GETNCNT,NULL);
+}
+#endif
+
+
+/* Same as semaphore_lock */
+static void
+semaphore_wait (int semid) {
+ struct sembuf op;
+
+ /* printf("Process %d locking semaphore %d\n",getpid(),semid); */
+ op.sem_num = SEMAPHORE_CREATION;
+ op.sem_op = -1;
+ op.sem_flg = SEM_UNDO;
+ semop(semid,&op,1);
+
+ return;
+}
+
+
+void
+Access_controlled_cleanup () {
+ List_free(&shmem_memory);
+ Intlist_free(&shmem_ids);
+ Intlist_free(&semaphore_ids);
+ return;
+}
+
+
+void
+Access_emergency_cleanup () {
+ List_T p;
+ Intlist_T q;
void *memory;
-#ifdef CHECK
- void *memory2;
+ int shmid, semid;
+ int nattached;
+ struct shmid_ds *buf = NULL;
+
+ fprintf(stderr,"Calling Access_emergency_cleanup\n");
+ for (p = shmem_memory, q = shmem_ids; p != NULL; p = List_next(p), q = Intlist_next(q)) {
+ memory = List_head(p);
+ shmid = Intlist_head(q);
+
+ if (shmdt(memory) == -1) {
+#if 0
+ /* Somehow, shmdt forks and prints the error message and continues with the rest of the code */
+ fprintf(stderr,"Error in Access_emergency_cleanup with shmdt on memory %p, shmid %d. Error %d: %s\n",
+ memory,shmid,errno,strerror(errno));
#endif
- FILE *fp;
- Stopwatch_T stopwatch;
- void *p;
- size_t i;
+ }
- *len = (size_t) Access_filesize(filename);
- if (*len == 0) {
- *seconds = 0.0;
- return (void *) NULL;
+ if ((nattached = shmem_nattached(shmid)) > 0) {
+ fprintf(stderr,"For shmid %d, %d other processes still attached\n",shmid,(int) nattached);
+
+ } else if (shmctl(shmid,IPC_RMID,buf) == -1) {
+#if 0
+ fprintf(stderr,"Error in Access_emergency_cleanup with shmctl. Error %d: %s\n",
+ errno,strerror(errno));
+#endif
+ } else {
+ fprintf(stderr,"Removed existing memory for shmid %d\n",shmid);
+ }
}
- Stopwatch_start(stopwatch = Stopwatch_new());
- memory = (void *) MALLOC(*len);
+ for (q = semaphore_ids; q != NULL; q = Intlist_next(q)) {
+ /* Many of these removals will be for semaphores that don't exist */
+ semid = Intlist_head(q);
+ semctl(semid,/*semnum*/0,IPC_RMID,NULL);
+ }
+ Intlist_free(&semaphore_ids);
+
+ if (shmem_memory != NULL) {
+ fprintf(stderr,"\n");
+ fprintf(stderr,"You may want to run 'ipcs -m' to see if any shared memory segments are still in use\n");
+ fprintf(stderr,"You can remove a shared memory segment manually by doing 'ipcrm -m <shmid>'\n");
+ fprintf(stderr,"\n");
+ List_free(&shmem_memory);
+ Intlist_free(&shmem_ids);
+ }
-#ifdef CHECK
- memory2 = (void *) MALLOC(*len);
- if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
- fprintf(stderr,"Error: can't open file %s with fopen\n",filename);
- exit(9);
+ return;
+}
+
+
+void
+Access_deallocate (void *memory, int shmid) {
+ struct shmid_ds *buf = NULL;
+ short nattached;
+
+ if (shmdt(memory) == -1) {
+#if 0
+ /* Somehow, shmdt forks and prints the error message and continues with the rest of the code */
+ fprintf(stderr,"Error in Access_emergency_cleanup with shmdt on memory %p, shmid %d. Error %d: %s\n",
+ memory,shmid,errno,strerror(errno));
+#endif
}
- if (eltsize == 1) {
- FREAD_CHARS(memory2,(*len)/eltsize,fp);
- } else if (eltsize == 4) {
- FREAD_UINTS(memory2,(*len)/eltsize,fp);
- } else if (eltsize == 8) {
- FREAD_UINT8S(memory2,(*len)/eltsize,fp);
+ if ((nattached = shmem_nattached(shmid)) > 0) {
+ fprintf(stderr,"For shmid %d, %d processes still attached\n",shmid,(int) nattached);
+ } else if (shmctl(shmid,IPC_RMID,buf) == -1) {
+#if 0
+ /* Somehow, shmctl forks and prints the error message and continues with the rest of the code */
+ fprintf(stderr,"Error in Access_deallocate with shmctl. Error %d: %s\n",
+ errno,strerror(errno));
+#endif
} else {
- fprintf(stderr,"Access_allocated called with an element size of %d, which is not handled\n",(int) eltsize);
- exit(9);
+ fprintf(stderr,"Removed existing memory for shmid %d\n",shmid);
}
- fclose(fp);
-#endif
+
+ return;
+}
+
+#define FREAD_BATCH 100000000 /* 100 million elements at a time */
+
+static void
+copy_memory_from_file (void *memory, char *filename, size_t filesize, size_t eltsize) {
+ FILE *fp;
+ void *p;
+ size_t i;
+
if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
fprintf(stderr,"Error: can't open file %s with fopen\n",filename);
exit(9);
}
-
+
if (eltsize == 1) {
- for (i = 0; i + FREAD_BATCH < (*len)/eltsize; i += FREAD_BATCH) {
+ for (i = 0; i + FREAD_BATCH < filesize/eltsize; i += FREAD_BATCH) {
p = (void *) &(((unsigned char *) memory)[i]);
fread(p,sizeof(unsigned char),FREAD_BATCH,fp);
}
-
- if (i < (*len)/eltsize) {
+
+ if (i < filesize/eltsize) {
p = (void *) &(((unsigned char *) memory)[i]);
- fread(p,sizeof(unsigned char),(*len)/eltsize - i,fp);
+ fread(p,sizeof(unsigned char),filesize/eltsize - i,fp);
}
} else if (eltsize == 4) {
- for (i = 0; i + FREAD_BATCH < (*len)/eltsize; i += FREAD_BATCH) {
+ for (i = 0; i + FREAD_BATCH < filesize/eltsize; i += FREAD_BATCH) {
p = (void *) &(((UINT4 *) memory)[i]);
fread(p,sizeof(UINT4),FREAD_BATCH,fp);
}
- if (i < (*len)/eltsize) {
+ if (i < filesize/eltsize) {
p = (void *) &(((UINT4 *) memory)[i]);
- fread(p,sizeof(UINT4),(*len)/eltsize - i,fp);
+ fread(p,sizeof(UINT4),filesize/eltsize - i,fp);
}
} else if (eltsize == 8) {
- for (i = 0; i + FREAD_BATCH < (*len)/eltsize; i += FREAD_BATCH) {
+ for (i = 0; i + FREAD_BATCH < filesize/eltsize; i += FREAD_BATCH) {
p = (void *) &(((UINT8 *) memory)[i]);
fread(p,sizeof(UINT8),FREAD_BATCH,fp);
}
- if (i < (*len)/eltsize) {
+ if (i < filesize/eltsize) {
p = (void *) &(((UINT8 *) memory)[i]);
- fread(p,sizeof(UINT8),(*len)/eltsize - i,fp);
+ fread(p,sizeof(UINT8),filesize/eltsize - i,fp);
+ }
+
+ } else {
+ fprintf(stderr,"Access_allocated called with an element size of %d, which is not handled\n",(int) eltsize);
+ exit(9);
+ }
+ fclose(fp);
+
+ return;
+}
+
+
+static void *
+shmem_attach (int *shmid, char *filename, off_t filesize, size_t eltsize) {
+ void *memory = NULL;
+ key_t key;
+ int semid;
+
+ key = ftok(filename,PROJECT_ID);
+ if ((semid = semget(key,/*nsems*/1,IPC_CREAT | IPC_EXCL | 0666)) != -1) {
+ /* Usually, we would set the value to be 1. However, we can set
+ the value to 0, because this process won't perform semaphore_wait */
+ semaphore_init(semid,SEMAPHORE_CREATION,/*value*/0);
+
+ /* Store semid in case we abort in the middle of this procedure */
+ semaphore_ids = Intlist_push(semaphore_ids,semid);
+ } else if ((semid = semget(key,0,0)) == -1) {
+ fprintf(stderr,"Error in getting semaphore\n");
+ abort();
+ } else {
+ semaphore_wait(semid);
+ }
+
+ /* The process tha created the semaphore will proceed, while the
+ others wait. They will be woken up when the semaphore is
+ removed. */
+
+ if ((*shmid = shmget(key,filesize,IPC_CREAT | IPC_EXCL | SHM_NORESERVE | 0666)) != -1) {
+ /* Created new shared memory */
+ if ((memory = shmat(*shmid,NULL,0)) == (void *) -1) {
+ fprintf(stderr,"Error with shmat. Error %d: %s\n",errno,strerror(errno));
+ } else {
+ shmem_memory = List_push(shmem_memory,memory);
+ shmem_ids = Intlist_push(shmem_ids,*shmid);
+ copy_memory_from_file(memory,filename,filesize,eltsize);
+ fprintf(stderr,"Attached new memory for %s...",filename);
}
+ } else if ((*shmid = shmget(key,0,0)) != -1) {
+ /* Found existing shared memory */
+ if ((memory = shmat(*shmid,NULL,0)) == (void *) -1) {
+ fprintf(stderr,"Error with shmat. Error %d: %s\n",errno,strerror(errno));
+ } else {
+ shmem_memory = List_push(shmem_memory,memory);
+ shmem_ids = Intlist_push(shmem_ids,*shmid);
+ fprintf(stderr,"Attached existing memory for %s...",filename);
+ }
+
+ } else {
+ fprintf(stderr,"Error with shmget. Error %d: %s\n",errno,strerror(errno));
+ abort();
+ }
+
+ /* The process that proceeded removes the semaphore here, allowing
+ the other processes to continue after their waits. The other
+ processes will try to remove the semaphore too, yielding an
+ error, which we simply ignore. */
+ semctl(semid,SEMAPHORE_NA,IPC_RMID,NULL);
+
+ return memory;
+}
+
+
+/* Bigendian conversion not needed after this */
+void *
+Access_allocate (int *shmid, size_t *len, double *seconds, char *filename, size_t eltsize, bool sharedp) {
+ void *memory;
+#ifdef CHECK
+ void *memory2;
+#endif
+#if 0 && defined (USE_MPI)
+ /* Does not work. Gets ftruncate error */
+ MPI_Comm comm;
+ MPI_Win win;
+#endif
+ Stopwatch_T stopwatch;
+
+ *len = (size_t) Access_filesize(filename);
+ if (*len == 0) {
+ *seconds = 0.0;
+ return (void *) NULL;
+ }
+
+ Stopwatch_start(stopwatch = Stopwatch_new());
+
+#ifdef CHECK
+ memory2 = (void *) MALLOC(*len);
+ if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
+ fprintf(stderr,"Error: can't open file %s with fopen\n",filename);
+ exit(9);
+ }
+
+ if (eltsize == 1) {
+ FREAD_CHARS(memory2,(*len)/eltsize,fp);
+ } else if (eltsize == 4) {
+ FREAD_UINTS(memory2,(*len)/eltsize,fp);
+ } else if (eltsize == 8) {
+ FREAD_UINT8S(memory2,(*len)/eltsize,fp);
} else {
fprintf(stderr,"Access_allocated called with an element size of %d, which is not handled\n",(int) eltsize);
exit(9);
}
fclose(fp);
+#endif
+
+ if (sharedp == true) {
+#if 0 && defined(USE_MPI)
+ /* Does not work. Gives ftruncate error */
+ MPI_Comm_split_type(MPI_COMM_WORLD,MPI_COMM_TYPE_SHARED,0,MPI_INFO_NULL,&comm);
+ MPI_Win_allocate_shared(*len,/*disp_unit*/1,MPI_INFO_NULL,comm,&memory,&win);
+ MPI_Win_free(&win);
+#else
+ memory = shmem_attach(&(*shmid),filename,/*filesize*/*len,eltsize);
+#endif
+ } else {
+ *shmid = 0;
+ memory = (void *) MALLOC(*len);
+ copy_memory_from_file(memory,filename,/*filesize*/*len,eltsize);
+ }
/* Note: the following (old non-batch mode) requires conversion to bigendian later, as needed */
/* fread(new->offsets,eltsize,sb.st_size/eltsize,fp); */
@@ -418,14 +751,18 @@ Access_mmap (int *fd, size_t *len, char *filename, size_t eltsize, bool randomp)
if ((*len = length = Access_filesize(filename)) == 0U) {
fprintf(stderr,"Warning: file %s is empty\n",filename);
- memory = NULL;
+ *fd = open(filename,O_RDONLY,0764); /* Still need to initialize value */
+ memory = (void *) NULL;
+
} else if ((*fd = open(filename,O_RDONLY,0764)) < 0) {
fprintf(stderr,"Error: can't open file %s with open for reading\n",filename);
exit(9);
+
} else if (sizeof(size_t) <= 4 && length > MAX32BIT) {
debug(printf("Too big to mmap\n"));
*len = 0;
- memory = NULL;
+ memory = (void *) NULL;
+
} else {
*len = (size_t) length;
memory = mmap(NULL,length,PROT_READ,0
@@ -438,12 +775,15 @@ Access_mmap (int *fd, size_t *len, char *filename, size_t eltsize, bool randomp)
#ifdef HAVE_MMAP_MAP_VARIABLE
|MAP_VARIABLE
#endif
+ /*|MAP_NORESERVE*/
,*fd,0);
+
if (memory == MAP_FAILED) {
- fprintf(stderr,"Got mmap failure on len %jd from length %jd. Error %d: %s\n",
+ fprintf(stderr,"Error in access.c (1): Got mmap failure on len %jd from length %jd. Error %d: %s\n",
length,length,errno,strerror(errno));
debug(printf("Got MAP_FAILED on len %jd from length %jd\n",length,length));
- memory = NULL;
+ memory = (void *) NULL;
+
} else if (randomp == true) {
debug(printf("Got mmap of %jd bytes at %p to %p\n",length,memory,memory+length-1));
#ifdef HAVE_MADVISE
@@ -451,6 +791,7 @@ Access_mmap (int *fd, size_t *len, char *filename, size_t eltsize, bool randomp)
madvise(memory,*len,MADV_RANDOM);
#endif
#endif
+
} else {
debug(printf("Got mmap of %jd bytes at %p to %p\n",length,memory,memory+length-1));
#ifdef HAVE_MADVISE
@@ -491,7 +832,7 @@ Access_mmap_offset (int *remainder, int fd, off_t offset, size_t length, size_t
if (sizeof(size_t) <= 4 && length > MAX32BIT) {
debug(printf("Too big to mmap\n"));
- memory = NULL;
+ memory = (void *) NULL;
} else {
memory = mmap(NULL,length,PROT_READ,0
#ifdef HAVE_MMAP_MAP_SHARED
@@ -503,12 +844,15 @@ Access_mmap_offset (int *remainder, int fd, off_t offset, size_t length, size_t
#ifdef HAVE_MMAP_MAP_VARIABLE
|MAP_VARIABLE
#endif
+ /*|MAP_NORESERVE*/
,fd,offset);
+
if (memory == MAP_FAILED) {
- fprintf(stderr,"Got mmap failure on fd %d, offset %jd, length %jd. Error %d: %s\n",
+ fprintf(stderr,"Error in access.c (2): Got mmap failure on fd %d, offset %jd, length %jd. Error %d: %s\n",
fd,offset,length,errno,strerror(errno));
debug(printf("Got MAP_FAILED on fd %d, offset %jd, length %zu\n",fd,offset,length));
- memory = NULL;
+ memory = (void *) NULL;
+
} else if (randomp == true) {
debug(printf("Got mmap of %jd bytes at %p to %p\n",length,memory,memory+length-1));
#ifdef HAVE_MADVISE
@@ -516,6 +860,7 @@ Access_mmap_offset (int *remainder, int fd, off_t offset, size_t length, size_t
madvise(memory,length,MADV_RANDOM);
#endif
#endif
+
} else {
debug(printf("Got mmap of %jd bytes at %p to %p\n",length,memory,memory+length-1));
#ifdef HAVE_MADVISE
@@ -548,19 +893,16 @@ Access_mmap_rw (int *fd, size_t *len, char *filename, size_t eltsize, bool rando
#endif
if ((*len = length = Access_filesize(filename)) == 0U) {
- fprintf(stderr,"Error: file %s is empty\n",filename);
- exit(9);
- }
-
- if ((*fd = open(filename,O_RDWR,0764)) < 0) {
+ fprintf(stderr,"Warning: file %s is empty\n",filename);
+ *fd = open(filename,O_RDWR,0764); /* Still need to initialize value */
+ memory = (void *) NULL;
+ } else if ((*fd = open(filename,O_RDWR,0764)) < 0) {
fprintf(stderr,"Error: can't open file %s with open for reading/writing\n",filename);
exit(9);
- }
-
- if (sizeof(size_t) <= 4 && length > MAX32BIT) {
+ } else if (sizeof(size_t) <= 4 && length > MAX32BIT) {
debug(printf("Too big to mmap\n"));
*len = 0;
- memory = NULL;
+ memory = (void *) NULL;
} else {
*len = (size_t) length;
memory = mmap(NULL,length,PROT_READ|PROT_WRITE,0
@@ -573,12 +915,15 @@ Access_mmap_rw (int *fd, size_t *len, char *filename, size_t eltsize, bool rando
#ifdef HAVE_MMAP_MAP_VARIABLE
|MAP_VARIABLE
#endif
+ /*|MAP_NORESERVE*/
,*fd,0);
+
if (memory == MAP_FAILED) {
- fprintf(stderr,"Got mmap failure on len %jd from length %jd. Error %d: %s\n",
+ fprintf(stderr,"Error in access.c (3): Got mmap failure on len %jd from length %jd. Error %d: %s\n",
*len,length,errno,strerror(errno));
debug(printf("Got MAP_FAILED on len %zu from length %jd\n",*len,length));
- memory = NULL;
+ memory = (void *) NULL;
+
} else if (randomp == true) {
debug(printf("Got mmap of %jd bytes at %p to %p\n",length,memory,memory+length-1));
#ifdef HAVE_MADVISE
@@ -586,6 +931,7 @@ Access_mmap_rw (int *fd, size_t *len, char *filename, size_t eltsize, bool rando
madvise(memory,*len,MADV_RANDOM);
#endif
#endif
+
} else {
debug(printf("Got mmap of %jd bytes at %p to %p\n",length,memory,memory+length-1));
#ifdef HAVE_MADVISE
@@ -624,7 +970,7 @@ Access_mmap_offset_rw (int *remainder, int fd, off_t offset, size_t length, size
if (sizeof(size_t) <= 4 && length > MAX32BIT) {
debug(printf("Too big to mmap\n"));
- memory = NULL;
+ memory = (void *) NULL;
} else {
memory = mmap(NULL,length,PROT_READ|PROT_WRITE,0
#ifdef HAVE_MMAP_MAP_SHARED
@@ -636,12 +982,15 @@ Access_mmap_offset_rw (int *remainder, int fd, off_t offset, size_t length, size
#ifdef HAVE_MMAP_MAP_VARIABLE
|MAP_VARIABLE
#endif
+ /*|MAP_NORESERVE*/
,fd,offset);
+
if (memory == MAP_FAILED) {
- fprintf(stderr,"Got mmap failure on offset %jd, length %jd. Error %d: %s\n",
+ fprintf(stderr,"Error in access.c (4): Got mmap failure on offset %jd, length %jd. Error %d: %s\n",
offset,length,errno,strerror(errno));
debug(printf("Got MAP_FAILED on offset %jd, length %zu\n",offset,length));
- memory = NULL;
+ memory = (void *) NULL;
+
} else if (randomp == true) {
debug(printf("Got mmap of %zu bytes at %p to %p\n",length,memory,memory+length-1));
#ifdef HAVE_MADVISE
@@ -649,6 +998,7 @@ Access_mmap_offset_rw (int *remainder, int fd, off_t offset, size_t length, size
madvise(memory,length,MADV_RANDOM);
#endif
#endif
+
} else {
debug(printf("Got mmap of %zu bytes at %p to %p\n",length,memory,memory+length-1));
#ifdef HAVE_MADVISE
@@ -687,24 +1037,22 @@ Access_mmap_and_preload (int *fd, size_t *len, int *npages, double *seconds, cha
if ((*len = length = Access_filesize(filename)) == 0U) {
- fprintf(stderr,"Error: file %s is empty\n",filename);
- exit(9);
- }
+ fprintf(stderr,"Warning: file %s is empty\n",filename);
+ *fd = open(filename,O_RDONLY,0764); /* Still need to initialize value */
+ memory = (void *) NULL;
- if ((*fd = open(filename,O_RDONLY,0764)) < 0) {
+ } else if ((*fd = open(filename,O_RDONLY,0764)) < 0) {
fprintf(stderr,"Error: can't open file %s with open for reading\n",filename);
exit(9);
- }
- if (sizeof(size_t) <= 4 && *len > MAX32BIT) {
+ } else if (sizeof(size_t) <= 4 && *len > MAX32BIT) {
debug(printf("Too big to mmap\n"));
*len = 0;
*npages = 0;
*seconds = 0.0;
- memory = NULL;
+ memory = (void *) NULL;
} else {
-
pagesize = get_pagesize();
indicesperpage = pagesize/eltsize;
@@ -721,14 +1069,17 @@ Access_mmap_and_preload (int *fd, size_t *len, int *npages, double *seconds, cha
#ifdef HAVE_MMAP_MAP_VARIABLE
|MAP_VARIABLE
#endif
+ /*|MAP_NORESERVE*/
,*fd,0);
+
if (memory == MAP_FAILED) {
- fprintf(stderr,"Got mmap failure on len %jd from length %jd. Error %d: %s\n",
+ fprintf(stderr,"Error in access.c (5): Got mmap failure on len %jd from length %jd. Error %d: %s\n",
*len,length,errno,strerror(errno));
debug(printf("Got MAP_FAILED on len %jd from length %zu\n",*len,length));
- memory = NULL;
+ memory = (void *) NULL;
Stopwatch_stop(stopwatch);
Stopwatch_free(&stopwatch);
+
} else {
/* Touch all pages */
debug(printf("Got mmap of %zu bytes at %p to %p\n",length,memory,memory+length-1));
diff --git a/src/access.h b/src/access.h
index 0f9cf3f..f9cfefe 100644
--- a/src/access.h
+++ b/src/access.h
@@ -1,9 +1,8 @@
-/* $Id: access.h 77636 2012-10-26 00:14:01Z twu $ */
+/* $Id: access.h 161940 2015-03-25 20:36:59Z twu $ */
#ifndef ACCESS_INCLUDED
#define ACCESS_INCLUDED
-
#ifdef HAVE_CONFIG_H
-#include <config.h>
+#include <config.h> /* For HAVE_UNISTD_H, HAVE_SYS_TYPES_H, HAVE_CADDR_T */
#endif
#ifdef HAVE_UNISTD_H
@@ -13,11 +12,13 @@
#include <sys/types.h> /* For size_t, and for mmap and off_t */
#endif
+#include <sys/ipc.h> /* For key_t */
+
#include "bool.h"
/* ALLOCATED implies bigendian conversion already done */
typedef enum {USE_ALLOCATE, USE_MMAP_ONLY, USE_MMAP_PRELOAD, USE_FILEIO} Access_mode_T;
-typedef enum {ALLOCATED, MMAPPED, FILEIO} Access_T;
+typedef enum {ALLOCATED_PRIVATE, ALLOCATED_SHARED, MMAPPED, FILEIO} Access_T;
#define MAX32BIT 4294967295U /* 2^32 - 1 */
extern bool
@@ -38,8 +39,20 @@ Access_fileio (char *filename);
extern int
Access_fileio_rw (char *filename);
+extern void
+Access_controlled_cleanup ();
+
+extern void
+Access_emergency_cleanup ();
+
+extern void
+Access_shmem_remove (char *filename);
+
+extern void
+Access_deallocate (void *memory, int shmid);
+
extern void *
-Access_allocated (size_t *len, double *seconds, char *filename, size_t eltsize);
+Access_allocate (int *shmid, size_t *len, double *seconds, char *filename, size_t eltsize, bool sharedp);
#ifdef HAVE_CADDR_T
extern caddr_t
diff --git a/src/atoi.h b/src/atoi.h
index 7fdbcf0..b19fbc7 100644
--- a/src/atoi.h
+++ b/src/atoi.h
@@ -1,3 +1,4 @@
+/* $Id: atoi.h 157222 2015-01-22 18:40:00Z twu $ */
#ifndef ATOI_INCLUDED
#define ATOI_INCLUDED
diff --git a/src/atoiindex.c b/src/atoiindex.c
index 914f03d..13344fa 100644
--- a/src/atoiindex.c
+++ b/src/atoiindex.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: atoiindex.c 142098 2014-07-22 03:11:00Z twu $";
+static char rcsid[] = "$Id: atoiindex.c 167263 2015-06-10 23:59:15Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -100,6 +100,7 @@ static struct option long_options[] = {
{"sampling", required_argument, 0, 'q'}, /* required_interval */
{"db", required_argument, 0, 'd'}, /* dbroot */
{"usesnps", required_argument, 0, 'v'}, /* snps_root */
+ {"build-sarray", required_argument, 0, 0}, /* build_suffix_array_p */
/* Help options */
{"version", no_argument, 0, 0}, /* print_program_version */
@@ -769,6 +770,7 @@ main (int argc, char *argv[]) {
UINT4 *ref_positions4;
Oligospace_T oligospace;
bool coord_values_8p;
+ int shmid;
/* For suffix array */
Univcoord_T genomelength;
@@ -815,6 +817,17 @@ main (int argc, char *argv[]) {
} else if (!strcmp(long_name,"help")) {
print_program_usage();
exit(0);
+
+ } else if (!strcmp(long_name,"build-sarray")) {
+ if (!strcmp(optarg,"0")) {
+ build_suffix_array_p = false;
+ } else if (!strcmp(optarg,"1")) {
+ build_suffix_array_p = true;
+ } else {
+ fprintf(stderr,"Argument to --build-sarray must be 0 or 1\n");
+ exit(9);
+ }
+
} else {
/* Shouldn't reach here */
fprintf(stderr,"Don't recognize option %s. For usage, run 'atoiindex --help'",long_name);
@@ -890,10 +903,12 @@ main (int argc, char *argv[]) {
ref_positions8_low = (UINT4 *) Access_mmap(&ref_positions_low_fd,&ref_positions_low_len,
filenames->positions_low_filename,sizeof(UINT4),/*randomp*/false);
#else
- ref_positions8_high = (unsigned char *) Access_allocated(&ref_positions_high_len,&seconds,
- filenames->positions_high_filename,sizeof(unsigned char));
- ref_positions8_low = (UINT4 *) Access_allocated(&ref_positions_low_len,&seconds,
- filenames->positions_low_filename,sizeof(UINT4));
+ ref_positions8_high = (unsigned char *) Access_allocate(&shmid,&ref_positions_high_len,&seconds,
+ filenames->positions_high_filename,sizeof(unsigned char),
+ /*sharedp*/false);
+ ref_positions8_low = (UINT4 *) Access_allocate(&shmid,&ref_positions_low_len,&seconds,
+ filenames->positions_low_filename,sizeof(UINT4),
+ /*sharedp*/false);
#endif
/* Unpack */
totalcounts = ref_positions_high_len/sizeof(unsigned char);
@@ -920,8 +935,9 @@ main (int argc, char *argv[]) {
ref_positions4 = (UINT4 *) Access_mmap(&ref_positions_low_fd,&ref_positions_low_len,
filenames->positions_low_filename,sizeof(UINT4),/*randomp*/false);
#else
- ref_positions4 = (UINT4 *) Access_allocated(&ref_positions_low_len,&seconds,
- filenames->positions_low_filename,sizeof(UINT4));
+ ref_positions4 = (UINT4 *) Access_allocate(&shmid,&ref_positions_low_len,&seconds,
+ filenames->positions_low_filename,sizeof(UINT4),
+ /*sharedp*/false);
#endif
}
@@ -971,7 +987,7 @@ main (int argc, char *argv[]) {
sarrayfile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".a2iag.sarray")+1,sizeof(char));
sprintf(sarrayfile,"%s/%s.a2iag.sarray",destdir,fileroot);
genomecomp = Genome_new(sourcedir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- /*uncompressedp*/false,/*access*/USE_MMAP_ONLY);
+ /*uncompressedp*/false,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
gbuffer = (unsigned char *) CALLOC(genomelength+1,sizeof(unsigned char));
Genome_fill_buffer_int_string(genomecomp,/*left*/0,/*length*/genomelength,gbuffer,ag_conversion);
gbuffer[genomelength] = 0; /* Tried N/X, but SACA_K fails */
@@ -993,7 +1009,7 @@ main (int argc, char *argv[]) {
/* Not needed if we already have gbuffer */
/* Required for computing LCP, but uses non-SIMD instructions */
genomebits = Genome_new(sourcedir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_BITS,
- /*uncompressedp*/false,/*access*/USE_MMAP_ONLY);
+ /*uncompressedp*/false,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
Genome_hr_setup(Genome_blocks(genomebits),/*snp_blocks*/NULL,
/*query_unk_mismatch_p*/false,/*genome_unk_mismatch_p*/false,
/*mode*/ATOI_STRANDED);
@@ -1025,12 +1041,13 @@ main (int argc, char *argv[]) {
/* Assume we have lcp_bytes already in memory. Don't need to use guide for speed. */
lcpguidefile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".a2iag.salcpguide1024")+1,sizeof(char));
sprintf(lcpguidefile,"%s/%s.a2iag.salcpguide1024",destdir,fileroot);
- lcp_guide = (UINT4 *) Access_allocated(&lcpguide_len,&seconds,lcpguidefile,sizeof(UINT4));
+ lcp_guide = (UINT4 *) Access_allocate(&shmid,&lcpguide_len,&seconds,lcpguidefile,sizeof(UINT4),
+ /*sharedp*/false);
FREE(lcpguidefile);
lcpexcfile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".a2iag.salcpexc")+1,sizeof(char));
sprintf(lcpexcfile,"%s/%s.a2iag.salcpexc",destdir,fileroot);
- lcp_exceptions = (UINT4 *) Access_allocated(&lcpexc_len,&seconds,lcpexcfile,sizeof(UINT4));
+ lcp_exceptions = (UINT4 *) Access_allocate(&shmid,&lcpexc_len,&seconds,lcpexcfile,sizeof(UINT4),/*sharedp*/false);
n_lcp_exceptions = lcpexc_len/(sizeof(UINT4) + sizeof(UINT4));
FREE(lcpexcfile);
@@ -1134,7 +1151,7 @@ main (int argc, char *argv[]) {
/* Not needed if we already have gbuffer */
/* Required for computing LCP, but uses non-SIMD instructions */
genomebits = Genome_new(sourcedir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_BITS,
- /*uncompressedp*/false,/*access*/USE_MMAP_ONLY);
+ /*uncompressedp*/false,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
Genome_hr_setup(Genome_blocks(genomebits),/*snp_blocks*/NULL,
/*query_unk_mismatch_p*/false,/*genome_unk_mismatch_p*/false,
/*mode*/ATOI_STRANDED);
@@ -1166,12 +1183,12 @@ main (int argc, char *argv[]) {
/* Assume we have lcp_bytes already in memory. Don't need to use guide for speed. */
lcpguidefile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".a2itc.salcpguide1024")+1,sizeof(char));
sprintf(lcpguidefile,"%s/%s.a2itc.salcpguide1024",destdir,fileroot);
- lcp_guide = (UINT4 *) Access_allocated(&lcpguide_len,&seconds,lcpguidefile,sizeof(UINT4));
+ lcp_guide = (UINT4 *) Access_allocate(&shmid,&lcpguide_len,&seconds,lcpguidefile,sizeof(UINT4),/*sharedp*/false);
FREE(lcpguidefile);
lcpexcfile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".a2itc.salcpexc")+1,sizeof(char));
sprintf(lcpexcfile,"%s/%s.a2itc.salcpexc",destdir,fileroot);
- lcp_exceptions = (UINT4 *) Access_allocated(&lcpexc_len,&seconds,lcpexcfile,sizeof(UINT4));
+ lcp_exceptions = (UINT4 *) Access_allocate(&shmid,&lcpexc_len,&seconds,lcpexcfile,sizeof(UINT4),/*sharedp*/false);
n_lcp_exceptions = lcpexc_len/(sizeof(UINT4) + sizeof(UINT4));
FREE(lcpexcfile);
diff --git a/src/bigendian.h b/src/bigendian.h
index 1fea066..7c0528f 100644
--- a/src/bigendian.h
+++ b/src/bigendian.h
@@ -1,8 +1,8 @@
-/* $Id: bigendian.h 99737 2013-06-27 19:33:03Z twu $ */
+/* $Id: bigendian.h 157223 2015-01-22 18:43:01Z twu $ */
#ifndef BIGENDIAN_INCLUDED
#define BIGENDIAN_INCLUDED
#ifdef HAVE_CONFIG_H
-#include <config.h>
+#include <config.h> /* For HAVE_64_BIT */
#endif
#include <stdio.h>
diff --git a/src/bitpack64-write.h b/src/bitpack64-write.h
index 1372f13..1688c30 100644
--- a/src/bitpack64-write.h
+++ b/src/bitpack64-write.h
@@ -1,9 +1,10 @@
+/* $Id: bitpack64-write.h 165968 2015-05-20 00:15:38Z twu $ */
#ifndef BITPACK64_WRITE_INCLUDED
#define BITPACK64_WRITE_INCLUDED
#include <stdio.h>
#include "types.h"
-/* Stores values 0..n */
+/* Stores the $(n+1)$ values [0..n] */
extern void
Bitpack64_write_differential (char *ptrsfile, char *compfile, UINT4 *ascending, UINT4 n);
extern void
@@ -17,7 +18,7 @@ extern void
Bitpack64_write_fixed10_huge (char *pagesfile, char *ptrsfile, char *compfile,
UINT8 *ascending, UINT4 n);
-/* Stores values 0..(n-1) */
+/* Stores the $n$ values [0..(n-1)] */
extern void
Bitpack64_write_direct (char *ptrsfile, char *compfile, UINT4 *direct, UINT4 n);
diff --git a/src/block.h b/src/block.h
index eca6ffb..a6dc69e 100644
--- a/src/block.h
+++ b/src/block.h
@@ -1,6 +1,7 @@
-/* $Id: block.h 99748 2013-06-27 21:01:48Z twu $ */
+/* $Id: block.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef BLOCK_INCLUDED
#define BLOCK_INCLUDED
+
#include "bool.h"
#include "genomicpos.h"
#include "indexdb.h"
diff --git a/src/bool.h b/src/bool.h
index ee6d862..94e4222 100644
--- a/src/bool.h
+++ b/src/bool.h
@@ -1,10 +1,14 @@
-/* $Id: bool.h 40271 2011-05-28 02:29:18Z twu $ */
+/* $Id: bool.h 155282 2014-12-12 19:42:54Z twu $ */
#ifndef BOOL_INCLUDED
#define BOOL_INCLUDED
/* typedef enum{false,true} bool; */
typedef unsigned char bool;
+#ifdef USE_MPI
+#define MPI_BOOL_T MPI_UNSIGNED_CHAR
+#endif
+
#define false 0
#define true 1
diff --git a/src/boyer-moore.h b/src/boyer-moore.h
index c3c2edf..bce2872 100644
--- a/src/boyer-moore.h
+++ b/src/boyer-moore.h
@@ -1,6 +1,7 @@
-/* $Id: boyer-moore.h 145990 2014-08-25 21:47:32Z twu $ */
+/* $Id: boyer-moore.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef BOYER_MOORE_INCLUDED
#define BOYER_MOORE_INCLUDED
+
#include "intlist.h"
#include "genomicpos.h"
diff --git a/src/bytecoding.h b/src/bytecoding.h
index 39ec4be..089e93c 100644
--- a/src/bytecoding.h
+++ b/src/bytecoding.h
@@ -1,6 +1,7 @@
-/* $Id: bytecoding.h 132144 2014-04-02 16:02:28Z twu $ */
+/* $Id: bytecoding.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef BYTECODING_INCLUDED
#define BYTECODING_INCLUDED
+
#include "bool.h"
#include "types.h"
diff --git a/src/bzip2.h b/src/bzip2.h
index 3c56fe5..194fc38 100644
--- a/src/bzip2.h
+++ b/src/bzip2.h
@@ -1,6 +1,7 @@
-/* $Id: bzip2.h 83593 2013-01-16 22:59:40Z twu $ */
+/* $Id: bzip2.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef BZIP2_INCLUDED
#define BZIP2_INCLUDED
+
#include "bool.h"
#define T Bzip2_T
diff --git a/src/chimera.c b/src/chimera.c
index 95f7901..740019e 100644
--- a/src/chimera.c
+++ b/src/chimera.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: chimera.c 164705 2015-05-01 20:26:27Z twu $";
+static char rcsid[] = "$Id: chimera.c 162196 2015-03-27 21:42:43Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -107,7 +107,7 @@ Chimera_cdna_direction (T this) {
void
-Chimera_print_sam_tag (FILE *fp, T this, Univ_IIT_T chromosome_iit) {
+Chimera_print_sam_tag (Filestring_T fp, T this, Univ_IIT_T chromosome_iit) {
char donor_strand, acceptor_strand;
char *donor_chr, *acceptor_chr;
bool alloc1p, alloc2p;
@@ -136,14 +136,14 @@ Chimera_print_sam_tag (FILE *fp, T this, Univ_IIT_T chromosome_iit) {
}
}
- fprintf(fp,"%c%c-%c%c,%.2f,%.2f",
+ FPRINTF(fp,"%c%c-%c%c,%.2f,%.2f",
this->donor1,this->donor2,this->acceptor2,this->acceptor1,this->donor_prob,this->acceptor_prob);
donor_chr = Univ_IIT_label(chromosome_iit,Stage3_chrnum(this->from),&alloc1p);
acceptor_chr = Univ_IIT_label(chromosome_iit,Stage3_chrnum(this->to),&alloc2p);
- fprintf(fp,",%c%s@%u..%c%s@%u",
+ FPRINTF(fp,",%c%s@%u..%c%s@%u",
donor_strand,donor_chr,Stage3_chrend(this->from),
acceptor_strand,acceptor_chr,Stage3_chrstart(this->to));
- fprintf(fp,",%d..%d",this->chimerapos+1,this->equivpos+1);
+ FPRINTF(fp,",%d..%d",this->chimerapos+1,this->equivpos+1);
if (alloc2p == true) {
FREE(acceptor_chr);
}
@@ -209,21 +209,21 @@ Chimera_free (T *old) {
void
-Chimera_print (FILE *fp, T this) {
+Chimera_print (Filestring_T fp, T this) {
if (this->exonexonpos > 0) {
- fprintf(fp," *** Possible chimera with exon-exon boundary");
+ FPRINTF(fp," *** Possible chimera with exon-exon boundary");
if (this->cdna_direction > 0) {
- fprintf(fp," (sense)");
+ FPRINTF(fp," (sense)");
} else if (this->cdna_direction < 0) {
- fprintf(fp," (antisense)");
+ FPRINTF(fp," (antisense)");
}
- fprintf(fp," at %d (dinucl = %c%c-%c%c, donor_prob = %.3f, acceptor_prob = %.3f)",
+ FPRINTF(fp," at %d (dinucl = %c%c-%c%c, donor_prob = %.3f, acceptor_prob = %.3f)",
this->exonexonpos+1,this->donor1,this->donor2,this->acceptor2,this->acceptor1,
this->donor_prob,this->acceptor_prob);
} else if (this->equivpos == this->chimerapos) {
- fprintf(fp," *** Possible chimera with breakpoint at %d",this->chimerapos+1);
+ FPRINTF(fp," *** Possible chimera with breakpoint at %d",this->chimerapos+1);
} else {
- fprintf(fp," *** Possible chimera with breakpoint at %d..%d",this->chimerapos+1,this->equivpos+1);
+ FPRINTF(fp," *** Possible chimera with breakpoint at %d..%d",this->chimerapos+1,this->equivpos+1);
}
return;
@@ -520,7 +520,7 @@ Chimera_bestpath (int *five_score, int *three_score, int *chimerapos, int *chime
for (pos = 0; pos < queryntlength; pos++) {
bestscoreatpos[pos] = NEG_INFINITY;
}
- debug(printf("npaths_sub1 = %d, npaths_sub2 = %d\n",npaths_sub1,npaths_sub2));
+ debug4(printf("npaths_sub1 = %d, npaths_sub2 = %d\n",npaths_sub1,npaths_sub2));
for (i = 0; i < npaths_sub1; i++) {
for (j = 0; j < npaths_sub2; j++) {
if (stage3array_sub1[i] == stage3array_sub2[j]) {
@@ -529,6 +529,7 @@ Chimera_bestpath (int *five_score, int *three_score, int *chimerapos, int *chime
/* Not joinable */
} else {
for (pos = 0; pos < queryntlength - 1; pos++) {
+ debug4(printf("pos %d, gapp %d and %d\n",pos,gapp_sub1[i][pos],gapp_sub2[j][pos]));
if (gapp_sub1[i][pos] == false && gapp_sub2[j][pos+1] == false) {
#if 0
score = matrix_sub2[j][queryntlength-1] - matrix_sub2[j][pos] + matrix_sub1[i][pos] /* - 0 */;
@@ -536,6 +537,7 @@ Chimera_bestpath (int *five_score, int *three_score, int *chimerapos, int *chime
/* For new Pair_pairscores computation */
score = matrix_sub1[i][pos] + matrix_sub2[j][pos];
#endif
+ debug4(printf("score %d\n",score));
if (score > bestscoreatpos[pos]) {
bestscoreatpos[pos] = score;
from[pos] = i;
@@ -567,7 +569,7 @@ Chimera_bestpath (int *five_score, int *three_score, int *chimerapos, int *chime
*three_score = matrix_sub2[*bestto][*chimerapos];
#endif
- debug(
+ debug4(
for (pos = 0; pos < queryntlength - 1; pos++) {
printf("%d:",pos);
for (i = 0; i < npaths_sub1; i++) {
@@ -1009,7 +1011,7 @@ Chimera_find_exonexon (int *found_cdna_direction, int *try_cdna_direction,
char *comp, bool *donor_watsonp, bool *acceptor_watsonp, double *donor_prob, double *acceptor_prob,
Stage3_T left_part, Stage3_T right_part, Genome_T genome, Genome_T genomealt,
Univ_IIT_T chromosome_iit, int breakpoint_start, int breakpoint_end) {
- int exonexonpos_fwd, exonexonpos_rev, temp;
+ int exonexonpos_fwd, exonexonpos_rev;
char donor1_fwd, donor2_fwd, acceptor2_fwd, acceptor1_fwd,
donor1_rev, donor2_rev, acceptor2_rev, acceptor1_rev;
char comp_fwd, comp_rev;
diff --git a/src/chimera.h b/src/chimera.h
index 8f6116a..2eb0a36 100644
--- a/src/chimera.h
+++ b/src/chimera.h
@@ -1,4 +1,4 @@
-/* $Id: chimera.h 156812 2015-01-15 20:55:07Z twu $ */
+/* $Id: chimera.h 156811 2015-01-15 20:51:29Z twu $ */
#ifndef CHIMERA_INCLUDED
#define CHIMERA_INCLUDED
@@ -9,6 +9,7 @@ typedef struct Chimera_T *Chimera_T;
#include "genome.h"
#include "stage3.h"
#include "iit-read-univ.h"
+#include "filestring.h"
#define T Chimera_T
@@ -24,7 +25,7 @@ Chimera_equivpos (T this);
extern int
Chimera_cdna_direction (T this);
extern void
-Chimera_print_sam_tag (FILE *fp, T this, Univ_IIT_T chromosome_iit);
+Chimera_print_sam_tag (Filestring_T fp, T this, Univ_IIT_T chromosome_iit);
extern double
Chimera_donor_prob (T this);
extern double
@@ -39,7 +40,7 @@ Chimera_new (Stage3_T from, Stage3_T to, int chimerapos, int chimeraequivpos,
extern void
Chimera_free (T *old);
extern void
-Chimera_print (FILE *fp, T this);
+Chimera_print (Filestring_T fp, T this);
extern int
Chimera_alignment_break (int *newstart, int *newend, Stage3_T stage3, int queryntlength, double fthreshold);
diff --git a/src/chrom.h b/src/chrom.h
index f01a972..2d75b08 100644
--- a/src/chrom.h
+++ b/src/chrom.h
@@ -1,6 +1,7 @@
-/* $Id: chrom.h 138522 2014-06-09 17:08:44Z twu $ */
+/* $Id: chrom.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef CHROM_INCLUDED
#define CHROM_INCLUDED
+
#include "bool.h"
#include "genomicpos.h"
#include "types.h"
diff --git a/src/cmet.h b/src/cmet.h
index 68ecdd5..6e57b6c 100644
--- a/src/cmet.h
+++ b/src/cmet.h
@@ -1,3 +1,4 @@
+/* $Id: cmet.h 157222 2015-01-22 18:40:00Z twu $ */
#ifndef CMET_INCLUDED
#define CMET_INCLUDED
diff --git a/src/cmetindex.c b/src/cmetindex.c
index 4c7b850..d44890d 100644
--- a/src/cmetindex.c
+++ b/src/cmetindex.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: cmetindex.c 142098 2014-07-22 03:11:00Z twu $";
+static char rcsid[] = "$Id: cmetindex.c 167263 2015-06-10 23:59:15Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -101,6 +101,7 @@ static struct option long_options[] = {
{"sampling", required_argument, 0, 'q'}, /* required_interval */
{"db", required_argument, 0, 'd'}, /* dbroot */
{"usesnps", required_argument, 0, 'v'}, /* snps_root */
+ {"build-sarray", required_argument, 0, 0}, /* build_suffix_array_p */
/* Help options */
{"version", no_argument, 0, 0}, /* print_program_version */
@@ -770,6 +771,7 @@ main (int argc, char *argv[]) {
UINT4 *ref_positions4;
Oligospace_T oligospace;
bool coord_values_8p;
+ int shmid;
/* For suffix array */
Univcoord_T genomelength;
@@ -816,6 +818,17 @@ main (int argc, char *argv[]) {
} else if (!strcmp(long_name,"help")) {
print_program_usage();
exit(0);
+
+ } else if (!strcmp(long_name,"build-sarray")) {
+ if (!strcmp(optarg,"0")) {
+ build_suffix_array_p = false;
+ } else if (!strcmp(optarg,"1")) {
+ build_suffix_array_p = true;
+ } else {
+ fprintf(stderr,"Argument to --build-sarray must be 0 or 1\n");
+ exit(9);
+ }
+
} else {
/* Shouldn't reach here */
fprintf(stderr,"Don't recognize option %s. For usage, run 'cmetindex --help'",long_name);
@@ -891,10 +904,11 @@ main (int argc, char *argv[]) {
ref_positions8_low = (UINT4 *) Access_mmap(&ref_positions_low_fd,&ref_positions_low_len,
filenames->positions_low_filename,sizeof(UINT4),/*randomp*/false);
#else
- ref_positions8_high = (unsigned char *) Access_allocated(&ref_positions_high_len,&seconds,
- filenames->positions_high_filename,sizeof(unsigned char));
- ref_positions8_low = (UINT4 *) Access_allocated(&ref_positions_low_len,&seconds,
- filenames->positions_low_filename,sizeof(UINT4));
+ ref_positions8_high = (unsigned char *) Access_allocate(&shmid,&ref_positions_high_len,&seconds,
+ filenames->positions_high_filename,sizeof(unsigned char),
+ /*sharedp*/false);
+ ref_positions8_low = (UINT4 *) Access_allocate(&shmid,&ref_positions_low_len,&seconds,
+ filenames->positions_low_filename,sizeof(UINT4),/*sharedp*/false);
#endif
/* Unpack */
totalcounts = ref_positions_high_len/sizeof(unsigned char);
@@ -921,8 +935,8 @@ main (int argc, char *argv[]) {
ref_positions4 = (UINT4 *) Access_mmap(&ref_positions_low_fd,&ref_positions_low_len,
filenames->positions_low_filename,sizeof(UINT4),/*randomp*/false);
#else
- ref_positions4 = (UINT4 *) Access_allocated(&ref_positions_low_len,&seconds,
- filenames->positions_low_filename,sizeof(UINT4));
+ ref_positions4 = (UINT4 *) Access_allocate(&shmid,&ref_positions_low_len,&seconds,
+ filenames->positions_low_filename,sizeof(UINT4),/*sharedp*/false);
#endif
}
@@ -973,7 +987,7 @@ main (int argc, char *argv[]) {
sarrayfile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".metct.sarray")+1,sizeof(char));
sprintf(sarrayfile,"%s/%s.metct.sarray",destdir,fileroot);
genomecomp = Genome_new(sourcedir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- /*uncompressedp*/false,/*access*/USE_MMAP_ONLY);
+ /*uncompressedp*/false,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
gbuffer = (unsigned char *) CALLOC(genomelength+1,sizeof(unsigned char));
Genome_fill_buffer_int_string(genomecomp,/*left*/0,/*length*/genomelength,gbuffer,ct_conversion);
@@ -996,7 +1010,7 @@ main (int argc, char *argv[]) {
/* Not needed if we already have gbuffer */
/* Required for computing LCP, but uses non-SIMD instructions */
genomebits = Genome_new(sourcedir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_BITS,
- /*uncompressedp*/false,/*access*/USE_MMAP_ONLY);
+ /*uncompressedp*/false,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
Genome_hr_setup(Genome_blocks(genomebits),/*snp_blocks*/NULL,
/*query_unk_mismatch_p*/false,/*genome_unk_mismatch_p*/false,
/*mode*/CMET_STRANDED);
@@ -1028,12 +1042,12 @@ main (int argc, char *argv[]) {
/* Assume we have lcp_bytes already in memory. Don't need to use guide for speed. */
lcpguidefile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".metct.salcpguide1024")+1,sizeof(char));
sprintf(lcpguidefile,"%s/%s.metct.salcpguide1024",destdir,fileroot);
- lcp_guide = (UINT4 *) Access_allocated(&lcpguide_len,&seconds,lcpguidefile,sizeof(UINT4));
+ lcp_guide = (UINT4 *) Access_allocate(&shmid,&lcpguide_len,&seconds,lcpguidefile,sizeof(UINT4),/*sharedp*/false);
FREE(lcpguidefile);
lcpexcfile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".metct.salcpexc")+1,sizeof(char));
sprintf(lcpexcfile,"%s/%s.metct.salcpexc",destdir,fileroot);
- lcp_exceptions = (UINT4 *) Access_allocated(&lcpexc_len,&seconds,lcpexcfile,sizeof(UINT4));
+ lcp_exceptions = (UINT4 *) Access_allocate(&shmid,&lcpexc_len,&seconds,lcpexcfile,sizeof(UINT4),/*sharedp*/false);
n_lcp_exceptions = lcpexc_len/(sizeof(UINT4) + sizeof(UINT4));
FREE(lcpexcfile);
@@ -1168,12 +1182,12 @@ main (int argc, char *argv[]) {
/* Assume we have lcp_bytes already in memory. Don't need to use guide for speed. */
lcpguidefile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".metga.salcpguide1024")+1,sizeof(char));
sprintf(lcpguidefile,"%s/%s.metga.salcpguide1024",destdir,fileroot);
- lcp_guide = (UINT4 *) Access_allocated(&lcpguide_len,&seconds,lcpguidefile,sizeof(UINT4));
+ lcp_guide = (UINT4 *) Access_allocate(&shmid,&lcpguide_len,&seconds,lcpguidefile,sizeof(UINT4),/*sharedp*/false);
FREE(lcpguidefile);
lcpexcfile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".metga.salcpexc")+1,sizeof(char));
sprintf(lcpexcfile,"%s/%s.metga.salcpexc",destdir,fileroot);
- lcp_exceptions = (UINT4 *) Access_allocated(&lcpexc_len,&seconds,lcpexcfile,sizeof(UINT4));
+ lcp_exceptions = (UINT4 *) Access_allocate(&shmid,&lcpexc_len,&seconds,lcpexcfile,sizeof(UINT4),/*sharedp*/false);
n_lcp_exceptions = lcpexc_len/(sizeof(UINT4) + sizeof(UINT4));
FREE(lcpexcfile);
diff --git a/src/compress.c b/src/compress.c
index a323cdd..e6e0b6e 100644
--- a/src/compress.c
+++ b/src/compress.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: compress.c 137996 2014-06-04 01:58:17Z twu $";
+static char rcsid[] = "$Id: compress.c 157566 2015-01-28 00:02:04Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1295,7 +1295,7 @@ Compress_shift (T this, int nshift) {
}
#else
-/* HAVE_SSE2 but not SSE3 */
+/* HAVE_SSE2 but not SSSE3 */
Genomecomp_T *
Compress_shift (T this, int nshift) {
Genomecomp_T *shifted;
diff --git a/src/compress.h b/src/compress.h
index 3a60527..41be273 100644
--- a/src/compress.h
+++ b/src/compress.h
@@ -1,4 +1,4 @@
-/* $Id: compress.h 157232 2015-01-22 18:55:31Z twu $ */
+/* $Id: compress.h 157225 2015-01-22 18:47:23Z twu $ */
#ifndef COMPRESS_INCLUDED
#define COMPRESS_INCLUDED
#ifdef HAVE_CONFIG_H
diff --git a/src/config.h.in b/src/config.h.in
index fae4677..ac9a8c4 100644
--- a/src/config.h.in
+++ b/src/config.h.in
@@ -72,9 +72,6 @@
/* Define to 1 if fseeko (and presumably ftello) exists and is declared. */
#undef HAVE_FSEEKO
-/* Define to 1 if you have a working Goby library. */
-#undef HAVE_GOBY
-
/* Define to 1 if you have the `index' function. */
#undef HAVE_INDEX
@@ -163,6 +160,27 @@
/* Define to 1 if you have the `rint' function. */
#undef HAVE_RINT
+/* Define to 1 if you have the `semctl' function. */
+#undef HAVE_SEMCTL
+
+/* Define to 1 if you have the `semget' function. */
+#undef HAVE_SEMGET
+
+/* Define to 1 if you have the `semop' function. */
+#undef HAVE_SEMOP
+
+/* Define to 1 if you have the `shmat' function. */
+#undef HAVE_SHMAT
+
+/* Define to 1 if you have the `shmctl' function. */
+#undef HAVE_SHMCTL
+
+/* Define to 1 if you have the `shmdt' function. */
+#undef HAVE_SHMDT
+
+/* Define to 1 if you have the `shmget' function. */
+#undef HAVE_SHMGET
+
/* Define to 1 if you have the `sigaction' function. */
#undef HAVE_SIGACTION
@@ -309,6 +327,9 @@
/* Define this if we can use the "t" mode for fopen safely. */
#undef USE_FOPEN_TEXT
+/* Define to 1 if you want to use MPI_File for input. */
+#undef USE_MPI_FILE_INPUT
+
/* Version number of package */
#undef VERSION
diff --git a/src/datadir.h b/src/datadir.h
index 5e5edfb..78f61c3 100644
--- a/src/datadir.h
+++ b/src/datadir.h
@@ -1,6 +1,7 @@
-/* $Id: datadir.h 40271 2011-05-28 02:29:18Z twu $ */
+/* $Id: datadir.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef DATADIR_INCLUDED
#define DATADIR_INCLUDED
+
#include <stdio.h>
extern char *
diff --git a/src/diag.c b/src/diag.c
index d5a2e62..6c26a2a 100644
--- a/src/diag.c
+++ b/src/diag.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: diag.c 146622 2014-09-02 21:30:22Z twu $";
+static char rcsid[] = "$Id: diag.c 166641 2015-05-29 21:13:04Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -40,26 +40,43 @@ static char rcsid[] = "$Id: diag.c 146622 2014-09-02 21:30:22Z twu $";
#endif
-#ifndef USE_DIAGPOOL
+#if 0
+/* Oligoindex procedures use diagpool_push instead */
T
-Diag_new (Chrpos_T diagonal, int querystart, int queryend, int nconsecutive) {
+Diag_new (int querystart, int queryend, Diag_T diagonal) {
T new = (T) MALLOC(sizeof(*new));
new->diagonal = diagonal;
new->querystart = querystart;
new->queryend = queryend;
- new->nconsecutive = nconsecutive;
+ new->nconsecutive = queryend - querystart + 1;
new->dominatedp = false;
+ new->score = 0.0;
return new;
}
+#endif
+
void
Diag_free (T *old) {
FREE(*old);
return;
}
-#endif
+
+void
+Diag_gc (List_T *list) {
+ T diagonal;
+ List_T p;
+
+ for (p = *list; p != NULL; p = List_next(p)) {
+ diagonal = (T) List_head(p);
+ FREE(diagonal);
+ }
+ List_free(&(*list));
+ return;
+}
+
Chrpos_T
Diag_diagonal (T this) {
@@ -93,7 +110,54 @@ Diag_set_dominatedp (T this) {
int
-Diag_compare_nconsecutive (const void *x, const void *y) {
+Diag_ascending_cmp (const void *a, const void *b) {
+ T x = * (T *) a;
+ T y = * (T *) b;
+
+ if (x->querystart < y->querystart) {
+ return -1;
+ } else if (y->querystart < x->querystart) {
+ return +1;
+ } else if (x->queryend < y->queryend) {
+ return -1;
+ } else if (y->queryend < x->queryend) {
+ return +1;
+ } else if (x->diagonal < y->diagonal) {
+ return -1;
+ } else if (y->diagonal < x->diagonal) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+
+int
+Diag_descending_cmp (const void *a, const void *b) {
+ T x = * (T *) a;
+ T y = * (T *) b;
+
+ if (x->querystart > y->querystart) {
+ return -1;
+ } else if (y->querystart > x->querystart) {
+ return +1;
+ } else if (x->queryend > y->queryend) {
+ return -1;
+ } else if (y->queryend > x->queryend) {
+ return +1;
+ } else if (x->diagonal > y->diagonal) {
+ return -1;
+ } else if (y->diagonal > x->diagonal) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+
+
+static int
+nconsecutive_cmp (const void *x, const void *y) {
T a = * (T *) x;
T b = * (T *) y;
@@ -106,8 +170,8 @@ Diag_compare_nconsecutive (const void *x, const void *y) {
}
}
-int
-Diag_compare_diagonal (const void *x, const void *y) {
+static int
+diagonal_cmp (const void *x, const void *y) {
T a = * (T *) x;
T b = * (T *) y;
@@ -154,6 +218,8 @@ Diag_update_coverage (bool *coveredp, int *ncovered, List_T diagonals, int query
T diag;
int *scores, querypos, count;
+ debug(printf("Entered Diag_update_coverage with %d diagonals\n",List_length(diagonals)));
+
*ncovered = 0;
#ifdef GSNAP
scores = (int *) CALLOCA(querylength,sizeof(int));
@@ -248,7 +314,7 @@ diagonal_coverage (int *clear_coverage, T *array, int nunique) {
int
-Diag_compare_querystart (const void *x, const void *y) {
+_querystart (const void *x, const void *y) {
T a = * (T *) x;
T b = * (T *) y;
@@ -256,10 +322,6 @@ Diag_compare_querystart (const void *x, const void *y) {
return -1;
} else if (b->querystart < a->querystart) {
return +1;
- } else if (a->diagonal < b->diagonal) {
- return -1;
- } else if (b->diagonal < a->diagonal) {
- return +1;
} else {
return 0;
}
@@ -306,7 +368,7 @@ Diag_print_segments (List_T diagonals, char *queryseq_ptr, char *genomicseg_ptr)
if ((n = List_length(diagonals)) > 0) {
array = (T *) MALLOCA(n * sizeof(T));
List_fill_array((void **) array,diagonals);
- qsort(array,List_length(diagonals),sizeof(T),Diag_compare_querystart);
+ qsort(array,List_length(diagonals),sizeof(T),Diag_ascending_cmp);
for (i = 0; i < List_length(diagonals); i++) {
print_segment(array[i],/*chrinit*/0U,queryseq_ptr,genomicseg_ptr);
}
@@ -370,7 +432,7 @@ compute_dominance (int *nunique, T *array, int ndiagonals) {
int i, j, k;
T super, sub;
- qsort(array,ndiagonals,sizeof(T),Diag_compare_nconsecutive);
+ qsort(array,ndiagonals,sizeof(T),nconsecutive_cmp);
*nunique = ndiagonals;
i = 0;
@@ -561,7 +623,7 @@ Diag_compute_bounds (int *diag_querystart, int *diag_queryend,
array = compute_dominance(&nunique,array,ngooddiagonals);
}
- qsort(array,nunique,sizeof(T),Diag_compare_diagonal);
+ qsort(array,nunique,sizeof(T),diagonal_cmp);
if (debug_graphic_p == true) {
print_segments_for_R_array(array,nunique,chrinit,"red");
}
diff --git a/src/diag.h b/src/diag.h
index 9b4dd9b..1e4e639 100644
--- a/src/diag.h
+++ b/src/diag.h
@@ -1,6 +1,7 @@
-/* $Id: diag.h 106198 2013-08-28 23:07:34Z twu $ */
+/* $Id: diag.h 166641 2015-05-29 21:13:04Z twu $ */
#ifndef DIAG_INCLUDED
#define DIAG_INCLUDED
+
#include "bool.h"
#include "list.h"
#include "genomicpos.h"
@@ -9,12 +10,13 @@
#define T Diag_T
typedef struct T *T;
-#ifndef USE_DIAGPOOL
extern T
-Diag_new (Chrpos_T diagonal, int querystart, int queryend, int nconsecutive);
+Diag_new (int querystart, int queryend, Univcoord_T univdiagonal);
extern void
Diag_free (T *old);
-#endif
+extern void
+Diag_gc (List_T *list);
+
extern Chrpos_T
Diag_diagonal (T this);
@@ -29,9 +31,9 @@ Diag_dominatedp (T this);
extern void
Diag_set_dominatedp (T this);
extern int
-Diag_compare_nconsecutive (const void *x, const void *y);
+Diag_ascending_cmp (const void *a, const void *b);
extern int
-Diag_compare_diagonal (const void *x, const void *y);
+Diag_descending_cmp (const void *a, const void *b);
extern double
Diag_update_coverage (bool *coveredp, int *ncovered, List_T diagonals, int querylength);
extern int
diff --git a/src/diagdef.h b/src/diagdef.h
index 1f9e3f5..5a1d898 100644
--- a/src/diagdef.h
+++ b/src/diagdef.h
@@ -1,6 +1,7 @@
-/* $Id: diagdef.h 99737 2013-06-27 19:33:03Z twu $ */
+/* $Id: diagdef.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef DIAGDEF_INCLUDED
#define DIAGDEF_INCLUDED
+
#include "bool.h"
#define T Diag_T
diff --git a/src/diagpool.c b/src/diagpool.c
index 21bb0a3..4e7233c 100644
--- a/src/diagpool.c
+++ b/src/diagpool.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: diagpool.c 128119 2014-02-20 22:07:04Z twu $";
+static char rcsid[] = "$Id: diagpool.c 166641 2015-05-29 21:13:04Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -181,6 +181,7 @@ Diagpool_push (List_T list, T this, int diagonal, int querystart, int queryend,
diag->queryend = queryend;
diag->nconsecutive = nconsecutive;
diag->dominatedp = false;
+ diag->score = 0.0;
debug(printf("Creating %p: %d %d..%d\n",diag,diag->diagonal,diag->querystart,diag->queryend));
@@ -216,3 +217,25 @@ Diagpool_pop (List_T list, Diag_T *x) {
}
+List_T
+Diagpool_push_existing (List_T list, T this, Diag_T diag) {
+ List_T listcell;
+ List_T p;
+ int n;
+
+ if (this->listcellctr >= this->nlistcells) {
+ this->listcellptr = add_new_listcellchunk(this);
+ } else if ((this->listcellctr % CHUNKSIZE) == 0) {
+ for (n = this->nlistcells - CHUNKSIZE, p = this->listcellchunks;
+ n > this->listcellctr; p = p->rest, n -= CHUNKSIZE) ;
+ this->listcellptr = (struct List_T *) p->first;
+ debug1(printf("Located listcell %d at %p\n",this->listcellctr,this->listcellptr));
+ }
+ listcell = this->listcellptr++;
+ this->listcellctr++;
+
+ listcell->first = (void *) diag;
+ listcell->rest = list;
+
+ return listcell;
+}
diff --git a/src/diagpool.h b/src/diagpool.h
index 5e0d2c5..117de0f 100644
--- a/src/diagpool.h
+++ b/src/diagpool.h
@@ -1,6 +1,7 @@
-/* $Id: diagpool.h 40326 2011-05-30 17:27:01Z twu $ */
+/* $Id: diagpool.h 166641 2015-05-29 21:13:04Z twu $ */
#ifndef DIAGPOOL_INCLUDED
#define DIAGPOOL_INCLUDED
+
#include "diag.h"
#include "list.h"
@@ -23,6 +24,9 @@ extern List_T
Diagpool_push (List_T list, T this, int diagonal, int querystart, int queryend, int nconsecutive);
extern List_T
Diagpool_pop (List_T list, Diag_T *x);
+extern List_T
+Diagpool_push_existing (List_T list, T this, Diag_T diag);
+
#undef T
#endif
diff --git a/src/doublelist.c b/src/doublelist.c
index c95afa1..d7afff4 100644
--- a/src/doublelist.c
+++ b/src/doublelist.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: doublelist.c 155502 2014-12-16 22:22:35Z twu $";
+static char rcsid[] = "$Id: doublelist.c 166641 2015-05-29 21:13:04Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -197,3 +197,14 @@ Doublelist_min (T this) {
return minvalue;
}
+
+void
+Doublelist_print (T this) {
+ T p;
+
+ for (p = this; p; p = p->rest) {
+ printf("%f\n",this->first);
+ }
+ return;
+}
+
diff --git a/src/doublelist.h b/src/doublelist.h
index 752e902..a2f2cfe 100644
--- a/src/doublelist.h
+++ b/src/doublelist.h
@@ -1,4 +1,4 @@
-/* $Id: doublelist.h 154778 2014-12-06 03:32:33Z twu $ */
+/* $Id: doublelist.h 166641 2015-05-29 21:13:04Z twu $ */
#ifndef DOUBLELIST_INCLUDED
#define DOUBLELIST_INCLUDED
@@ -25,6 +25,8 @@ extern double
Doublelist_max (T this);
extern double
Doublelist_min (T this);
+extern void
+Doublelist_print (T this);
#undef T
#endif
diff --git a/src/dynprog.h b/src/dynprog.h
index ab4cdae..b4b134c 100644
--- a/src/dynprog.h
+++ b/src/dynprog.h
@@ -1,4 +1,4 @@
-/* $Id: dynprog.h 157232 2015-01-22 18:55:31Z twu $ */
+/* $Id: dynprog.h 157225 2015-01-22 18:47:23Z twu $ */
#ifndef DYNPROG_INCLUDED
#define DYNPROG_INCLUDED
#ifdef HAVE_CONFIG_H
diff --git a/src/except.h b/src/except.h
index af2680b..ef0f206 100644
--- a/src/except.h
+++ b/src/except.h
@@ -1,4 +1,4 @@
-/* $Id: except.h 157232 2015-01-22 18:55:31Z twu $ */
+/* $Id: except.h 157225 2015-01-22 18:47:23Z twu $ */
#ifndef EXCEPT_INCLUDED
#define EXCEPT_INCLUDED
#ifdef HAVE_CONFIG_H
diff --git a/src/filestring.c b/src/filestring.c
new file mode 100644
index 0000000..94a6811
--- /dev/null
+++ b/src/filestring.c
@@ -0,0 +1,490 @@
+static char rcsid[] = "$Id: filestring.c 162093 2015-03-26 18:54:22Z twu $";
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "filestring.h"
+#include <stdlib.h>
+#include <stdarg.h>
+#include <ctype.h> /* For isdigit() */
+#include "assert.h"
+#include "mem.h"
+#include "list.h"
+
+
+#define BLOCKSIZE 1024
+
+#ifdef DEBUG
+#define debug(x) x
+#else
+#define debug(x)
+#endif
+
+/* Simultaneous print to stdout */
+#ifdef DEBUG1
+#define debug1(x) x
+#else
+#define debug1(x)
+#endif
+
+
+#define T Filestring_T
+
+struct T {
+ int id;
+ SAM_split_output_type split_output;
+
+ List_T blocks;
+ int nleft;
+ char *ptr;
+
+ char *string;
+ int strlength;
+};
+
+
+int
+Filestring_id (T this) {
+ return this->id;
+}
+
+void
+Filestring_set_split_output (T this, int split_output) {
+ this->split_output = split_output;
+ return;
+}
+
+SAM_split_output_type
+Filestring_split_output (T this) {
+ return this->split_output;
+}
+
+
+T
+Filestring_new (int id) {
+ T new = (T) MALLOC_OUT(sizeof(*new));
+
+ new->id = id;
+ new->split_output = OUTPUT_NONE;
+ new->blocks = (List_T) NULL;
+ new->nleft = 0;
+ new->ptr = (char *) NULL;
+
+ new->string = (char *) NULL;
+
+ return new;
+}
+
+void
+Filestring_free (T *old) {
+ List_T p;
+ char *block;
+
+ if (*old) {
+ if ((*old)->string != NULL) {
+ FREE_OUT((*old)->string);
+ }
+
+ for (p = (*old)->blocks; p != NULL; p = List_next(p)) {
+ block = (char *) List_head(p);
+ FREE_OUT(block);
+ }
+ List_free_out(&(*old)->blocks);
+
+ FREE_OUT(*old);
+ }
+
+ return;
+}
+
+
+void
+Filestring_stringify (T this) {
+ List_T p, next;
+ char *ptr, *dest;
+ int nblocks, i;
+
+ if ((nblocks = List_length(this->blocks)) == 0) {
+ this->string = (char *) NULL;
+ this->strlength = -1;
+
+ } else if (this->string != NULL) {
+ /* Already stringified */
+
+ } else {
+ this->strlength = (nblocks - 1) * BLOCKSIZE + (BLOCKSIZE - this->nleft);
+ dest = this->string = (char *) MALLOC_OUT((this->strlength + 1) * sizeof(char));
+
+ p = this->blocks = List_reverse(this->blocks);
+
+ next = List_next(p);
+ while (next != NULL) {
+ ptr = (char *) List_head(p);
+ for (i = 0; i < BLOCKSIZE; i++) {
+ *dest++ = *ptr++;
+ }
+ p = next;
+ next = List_next(p);
+ }
+
+ ptr = (char *) List_head(p);
+ for (i = 0; i < BLOCKSIZE - this->nleft; i++) {
+ *dest++ = *ptr++;
+ }
+
+ *dest = '\0';
+ }
+
+ return;
+}
+
+
+/* Could assume that Filestring_stringify has been called */
+void
+Filestring_print (
+#ifdef USE_MPI
+ MPI_File fp,
+#else
+ FILE *fp,
+#endif
+ T this) {
+ List_T p, next;
+ char *ptr;
+
+ if (this == NULL) {
+ return;
+
+#ifdef USE_MPI
+ } else if (fp == NULL) {
+ /* This may not work if worker is from rank 0 */
+ Filestring_send(this,/*dest*/0,/*tag*/MPI_TAG_WRITE_STDOUT,MPI_COMM_WORLD);
+
+#endif
+
+ } else if (this->string != NULL) {
+ /* Already stringified */
+#ifdef USE_MPI
+ debug1(fwrite(this->string,sizeof(char),this->strlength,stdout));
+ MPI_File_write_shared(fp,this->string,this->strlength,MPI_CHAR,MPI_STATUS_IGNORE);
+#else
+ fwrite(this->string,sizeof(char),this->strlength,fp);
+#endif
+
+ } else if (this->blocks == NULL) {
+ return;
+
+ } else {
+ p = this->blocks = List_reverse(this->blocks);
+
+ next = List_next(p);
+ while (next != NULL) {
+ ptr = (char *) List_head(p);
+#ifdef USE_MPI
+ debug1(fwrite(ptr,sizeof(char),BLOCKSIZE,stdout));
+ MPI_File_write_shared(fp,ptr,BLOCKSIZE,MPI_CHAR,MPI_STATUS_IGNORE);
+#else
+ fwrite(ptr,sizeof(char),BLOCKSIZE,fp);
+#endif
+ p = next;
+ next = List_next(p);
+ }
+
+ ptr = (char *) List_head(p);
+#ifdef USE_MPI
+ debug1(fwrite(ptr,sizeof(char),BLOCKSIZE - this->nleft,stdout));
+ MPI_File_write_shared(fp,ptr,BLOCKSIZE - this->nleft,MPI_CHAR,MPI_STATUS_IGNORE);
+#else
+ fwrite(ptr,sizeof(char),BLOCKSIZE - this->nleft,fp);
+#endif
+ }
+
+ return;
+}
+
+
+static void
+transfer_char (T this, char c) {
+ char *block;
+
+ if (this->nleft == 0) {
+ block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
+ this->blocks = List_push_out(this->blocks,(void *) block);
+ this->nleft = BLOCKSIZE;
+ this->ptr = &(block[0]);
+ }
+ *this->ptr++ = c;
+ this->nleft -= 1;
+
+ return;
+}
+
+void
+transfer_string (T this, char *string, int bufferlen) {
+ char *block, *q;
+
+ for (q = string; --bufferlen >= 0 && *q != '\0'; q++) {
+ if (this->nleft == 0) {
+ block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
+ this->blocks = List_push_out(this->blocks,(void *) block);
+ this->nleft = BLOCKSIZE;
+ this->ptr = &(block[0]);
+ }
+ *this->ptr++ = *q;
+ this->nleft -= 1;
+ }
+
+ if (bufferlen < 0) {
+ fprintf(stderr,"Overflowed buffer without seeing a terminating character\n");
+ fprintf(stderr,"String was %s\n",q);
+ abort();
+ }
+
+ return;
+}
+
+
+
+#define BUFFERLEN 1024
+
+void
+Filestring_put (T this, const char *format, ...) {
+ va_list values;
+
+ char BUFFER[BUFFERLEN];
+ char *block;
+ const char *p;
+ char *q, c;
+ int precision;
+
+ va_start(values,format);
+
+ p = format;
+ debug(printf("format is %s\n",format));
+ while (*p != '\0') {
+ if ((c = *p) == '\\') { /* escape */
+ debug(printf("Saw an escape character\n"));
+ switch (*++p) {
+ case 't': transfer_char(this,'\t'); break; /* Actually \t shows up as an ASCII character */
+ case '\\': transfer_char(this,'\\'); break;
+ default: fprintf(stderr,"Cannot parse \\%c\n",*p);
+ }
+
+ } else if (c == '%') { /* formatting */
+ debug(printf("After formatting character saw %c\n",p[1]));
+ switch (*++p) {
+ case '%': /* percent sign */
+ transfer_char(this,'%');
+ break;
+
+ case 'c': /* character */
+ transfer_char(this,(char) va_arg(values, int));
+ break;
+
+ case 's': /* string */
+ for (q = va_arg(values, char *); *q != '\0'; q++) {
+ transfer_char(this,*q);
+ }
+ break;
+
+ case '.': /* float or double */
+ if (*++p == '*') {
+ precision = va_arg(values, int);
+ ++p;
+ } else {
+ sscanf(p,"%d",&precision);
+ while (isdigit(*++p)) ;
+ }
+ switch (*p) {
+ case 'f':
+ sprintf(BUFFER,"%.*f",precision,va_arg(values, double));
+ transfer_string(this,BUFFER,BUFFERLEN);
+ break;
+
+ case 'e':
+ sprintf(BUFFER,"%.*e",precision,va_arg(values, double));
+ transfer_string(this,BUFFER,BUFFERLEN);
+ break;
+
+ case 'g':
+ sprintf(BUFFER,"%.*g",precision,va_arg(values, double));
+ transfer_string(this,BUFFER,BUFFERLEN);
+ break;
+
+ case 's':
+ sprintf(BUFFER,"%.*s",precision,va_arg(values, char *));
+ transfer_string(this,BUFFER,BUFFERLEN);
+ break;
+
+ default: fprintf(stderr,"Cannot parse %%.%d%c\n",precision,*p); abort();
+ }
+ break;
+
+ case '*': /* indirect int or string */
+ precision = va_arg(values, int);
+ debug(printf("format is %c\n",p[1]));
+ switch (*++p) {
+ case 'd':
+ sprintf(BUFFER,"%*d",precision,va_arg(values, int));
+ transfer_string(this,BUFFER,BUFFERLEN);
+ break;
+ case 'u':
+ sprintf(BUFFER,"%*u",precision,va_arg(values, unsigned int));
+ transfer_string(this,BUFFER,BUFFERLEN);
+ break;
+ case 's':
+ sprintf(BUFFER,"%*s",precision,va_arg(values, char *));
+ transfer_string(this,BUFFER,BUFFERLEN);
+ break;
+ default: fprintf(stderr,"Cannot parse %%*%c\n",*p); abort();
+ }
+ break;
+
+ case 'd': /* int */
+ sprintf(BUFFER,"%d",va_arg(values, int));
+ transfer_string(this,BUFFER,BUFFERLEN);
+ break;
+
+ case 'f': /* float */
+ sprintf(BUFFER,"%f",va_arg(values, double));
+ transfer_string(this,BUFFER,BUFFERLEN);
+ break;
+
+ case 'u': /* unsigned int */
+ sprintf(BUFFER,"%u",va_arg(values, unsigned int));
+ transfer_string(this,BUFFER,BUFFERLEN);
+ break;
+
+ case 'l':
+ switch (*++p) {
+ case 'd': /* long int */
+ sprintf(BUFFER,"%ld",va_arg(values, long int));
+ transfer_string(this,BUFFER,BUFFERLEN);
+ break;
+
+ case 'u': /* unsigned long */
+ sprintf(BUFFER,"%lu",va_arg(values, unsigned long));
+ transfer_string(this,BUFFER,BUFFERLEN);
+ break;
+
+ case 'l':
+ switch (*++p) {
+ case 'd': /* long long int */
+ sprintf(BUFFER,"%lld",va_arg(values, long long int));
+ transfer_string(this,BUFFER,BUFFERLEN);
+ break;
+
+ case 'u': /* unsigned long long */
+ sprintf(BUFFER,"%llu",va_arg(values, unsigned long long));
+ break;
+
+ default: fprintf(stderr,"Cannot parse %%ll%c\n",*p); abort();
+ }
+ break;
+
+ default: fprintf(stderr,"Cannot parse %%l%c\n",*p); abort();
+ }
+ break;
+
+ default: fprintf(stderr,"Cannot parse %%%c\n",*p); abort();
+ }
+
+ } else {
+ /* transfer_char(this,c); -- effectively inlined here */
+ if (this->nleft == 0) {
+ block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
+ this->blocks = List_push_out(this->blocks,(void *) block);
+ this->nleft = BLOCKSIZE;
+ this->ptr = &(block[0]);
+ }
+ *this->ptr++ = c;
+ this->nleft -= 1;
+ }
+
+ p++;
+ }
+
+ va_end(values);
+
+ return;
+}
+
+void
+Filestring_putc (char c, T this) {
+ char *block;
+
+ if (this->nleft == 0) {
+ block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
+ this->blocks = List_push_out(this->blocks,(void *) block);
+ this->nleft = BLOCKSIZE;
+ this->ptr = &(block[0]);
+ }
+ *this->ptr++ = c;
+ this->nleft -= 1;
+}
+
+
+/* Modified from transfer_string */
+void
+Filestring_puts (T this, char *string, int strlength) {
+ char *block, *q;
+
+ for (q = string; --strlength >= 0; q++) {
+ if (this->nleft == 0) {
+ block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
+ this->blocks = List_push_out(this->blocks,(void *) block);
+ this->nleft = BLOCKSIZE;
+ this->ptr = &(block[0]);
+ }
+ *this->ptr++ = *q;
+ this->nleft -= 1;
+ }
+
+ return;
+}
+
+
+
+#ifdef USE_MPI
+char *
+Filestring_extract (int *strlength, T this) {
+ Filestring_stringify(this);
+ if ((*strlength = this->strlength) == 0) {
+ return (char *) NULL;
+ } else {
+ return this->string;
+ }
+}
+
+
+void
+Filestring_send (T this, int dest, int tag, MPI_Comm comm) {
+ Filestring_stringify(this);
+ MPI_SEND(&this->strlength,1,MPI_INT,dest,tag,comm);
+ if (this->strlength > 0) {
+ MPI_SEND(this->string,this->strlength+1,MPI_CHAR,dest,tag,comm);
+ }
+ return;
+}
+
+
+char *
+Filestring_recv (int *strlength, int source, int tag, MPI_Comm comm) {
+ char *string;
+ MPI_Status status;
+
+ MPI_RECV(&(*strlength),1,MPI_INT,source,tag,comm,&status);
+ if (*strlength <= 0) {
+ string = (char *) MALLOC(1 * sizeof(char));
+ string[0] = '\0';
+ *strlength = 0;
+ } else {
+ string = (char *) MALLOC(((*strlength) + 1) * sizeof(char));
+ MPI_RECV(string,(*strlength) + 1,MPI_CHAR,source,tag,comm,&status);
+ }
+
+ return string;
+}
+#endif
+
+
+
diff --git a/src/filestring.h b/src/filestring.h
new file mode 100644
index 0000000..fb37b61
--- /dev/null
+++ b/src/filestring.h
@@ -0,0 +1,62 @@
+/* $Id: filestring.h 159426 2015-02-25 00:35:16Z twu $ */
+#ifndef FILESTRING_INCLUDED
+#define FILESTRING_INCLUDED
+
+#ifdef USE_MPI
+#include <mpi.h>
+#include "mpidebug.h"
+#endif
+
+#include <stdio.h>
+#include "samflags.h"
+
+#define FPRINTF Filestring_put
+#define PUTC Filestring_putc
+
+
+#define T Filestring_T
+typedef struct T *T;
+
+extern int
+Filestring_id (T this);
+extern void
+Filestring_set_split_output (T this, int split_output);
+extern SAM_split_output_type
+Filestring_split_output (T this);
+extern T
+Filestring_new (int id);
+extern void
+Filestring_free (T *old);
+extern void
+Filestring_stringify (T this);
+extern void
+Filestring_print (
+#ifdef USE_MPI
+ MPI_File fp,
+#else
+ FILE *fp,
+#endif
+ T this);
+extern char *
+Filestring_get (int *strlength, T this);
+extern void
+Filestring_put (T this, const char *format, ...);
+extern void
+Filestring_putc (char c, T this);
+extern void
+Filestring_puts (T this, char *string, int strlength);
+
+#ifdef USE_MPI
+extern char *
+Filestring_extract (int *strlength, T this);
+extern void
+Filestring_send (T this, int dest, int tag, MPI_Comm comm);
+extern char *
+Filestring_recv (int *strlength, int source, int tag, MPI_Comm comm);
+#endif
+
+
+#undef T
+#endif
+
+
diff --git a/src/fopen.h b/src/fopen.h
index f77e394..6bd0b7e 100644
--- a/src/fopen.h
+++ b/src/fopen.h
@@ -1,7 +1,7 @@
#ifndef FOPEN_INCLUDED
#define FOPEN_INCLUDED
#ifdef HAVE_CONFIG_H
-#include <config.h>
+#include <config.h> /* For USE_FOPEN_BINARY, USE_FOPEN_TEXT */
#endif
diff --git a/src/genome-write.h b/src/genome-write.h
index 8d38abc..d9592b1 100644
--- a/src/genome-write.h
+++ b/src/genome-write.h
@@ -1,6 +1,7 @@
-/* $Id: genome-write.h 132144 2014-04-02 16:02:28Z twu $ */
+/* $Id: genome-write.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef GENOME_WRITE_INCLUDED
#define GENOME_WRITE_INCLUDED
+
#include <stdio.h>
#include "bool.h"
#include "iit-read-univ.h"
diff --git a/src/genome.c b/src/genome.c
index 94746e2..fec13f2 100644
--- a/src/genome.c
+++ b/src/genome.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genome.c 153955 2014-11-24 17:54:45Z twu $";
+static char rcsid[] = "$Id: genome.c 161940 2015-03-25 20:36:59Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -16,6 +16,7 @@ static char rcsid[] = "$Id: genome.c 153955 2014-11-24 17:54:45Z twu $";
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h> /* For munmap */
+
#ifdef HAVE_UNISTD_H
#include <unistd.h> /* For lseek and close */
#endif
@@ -78,6 +79,9 @@ static char rcsid[] = "$Id: genome.c 153955 2014-11-24 17:54:45Z twu $";
#define T Genome_T
struct T {
Access_T access;
+ int chars_shmid;
+ int blocks_shmid;
+
int fd;
size_t len;
@@ -111,8 +115,20 @@ Genome_totallength (T this) {
void
Genome_free (T *old) {
if (*old) {
- if ((*old)->access == ALLOCATED) {
- FREE((*old)->blocks);
+ if ((*old)->access == ALLOCATED_PRIVATE) {
+ if ((*old)->compressedp == true) {
+ FREE((*old)->blocks);
+ } else {
+ FREE((*old)->chars);
+ }
+
+ } else if ((*old)->access == ALLOCATED_SHARED) {
+ if ((*old)->compressedp == true) {
+ Access_deallocate((*old)->blocks,(*old)->blocks_shmid);
+ } else {
+ Access_deallocate((*old)->chars,(*old)->chars_shmid);
+ }
+
#ifdef HAVE_MMAP
} else if ((*old)->access == MMAPPED) {
if ((*old)->compressedp == true) {
@@ -135,9 +151,64 @@ Genome_free (T *old) {
}
+void
+Genome_shmem_remove (char *genomesubdir, char *fileroot, char *snps_root, Genometype_T genometype,
+ bool genome_lc_p) {
+ char *filename;
+ bool compressedp = !genome_lc_p;
+
+ if (compressedp == true) {
+ if (genometype == GENOME_OLIGOS) {
+ if (snps_root != NULL) {
+ filename = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+strlen(fileroot)+
+ strlen(".genomecomp.")+strlen(snps_root)+1,sizeof(char));
+ sprintf(filename,"%s/%s.genomecomp.%s",genomesubdir,fileroot,snps_root);
+ } else {
+ filename = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+strlen(fileroot)+
+ strlen(".genomecomp")+1,sizeof(char));
+ sprintf(filename,"%s/%s.genomecomp",genomesubdir,fileroot);
+ }
+
+ } else if (genometype == GENOME_BITS) {
+ if (snps_root != NULL) {
+ filename = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+strlen(fileroot)+
+ strlen(".genomebits128.")+strlen(snps_root)+1,sizeof(char));
+ sprintf(filename,"%s/%s.genomebits128.%s",genomesubdir,fileroot,snps_root);
+ } else {
+ filename = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+strlen(fileroot)+
+ strlen(".genomebits128")+1,sizeof(char));
+ sprintf(filename,"%s/%s.genomebits128",genomesubdir,fileroot);
+ }
+ if (Access_file_exists_p(filename) == false) {
+ fprintf(stderr,"Unable to detect new version of genome index: genomebits128 file not available. This version of GSNAP is not backwards compatible.\n");
+ fprintf(stderr,"Looking specifically for %s\n",filename);
+ FREE(filename);
+ exit(9);
+ return;
+ }
+
+ } else {
+ fprintf(stderr,"Don't recognize genome type %d\n",genometype);
+ abort();
+ }
+
+ } else {
+ filename = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+strlen(fileroot)+
+ strlen(".genome")+1,sizeof(char));
+ sprintf(filename,"%s/%s.genome",genomesubdir,fileroot);
+ }
+
+ Access_shmem_remove(filename);
+ FREE(filename);
+
+ return;
+
+}
+
+
T
Genome_new (char *genomesubdir, char *fileroot, char *snps_root, Genometype_T genometype,
- bool genome_lc_p, Access_mode_T access) {
+ bool genome_lc_p, Access_mode_T access, bool sharedp) {
T new = (T) MALLOC(sizeof(*new));
char *filename;
bool compressedp = !genome_lc_p;
@@ -199,7 +270,7 @@ Genome_new (char *genomesubdir, char *fileroot, char *snps_root, Genometype_T ge
} else if (genometype == GENOME_BITS) {
fprintf(stderr,"(bits)...");
}
- new->blocks = (Genomecomp_T *) Access_allocated(&new->len,&seconds,filename,sizeof(Genomecomp_T));
+ new->blocks = (Genomecomp_T *) Access_allocate(&new->blocks_shmid,&new->len,&seconds,filename,sizeof(Genomecomp_T),sharedp);
if (new->blocks == NULL) {
fprintf(stderr,"insufficient memory (need to use a lower batch mode (-B))\n");
exit(9);
@@ -207,7 +278,11 @@ Genome_new (char *genomesubdir, char *fileroot, char *snps_root, Genometype_T ge
comma = Genomicpos_commafmt(new->len);
fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma,seconds);
FREE(comma);
- new->access = ALLOCATED;
+ if (sharedp == true) {
+ new->access = ALLOCATED_SHARED;
+ } else {
+ new->access = ALLOCATED_PRIVATE;
+ }
}
#ifdef HAVE_MMAP
@@ -250,7 +325,7 @@ Genome_new (char *genomesubdir, char *fileroot, char *snps_root, Genometype_T ge
if (access == USE_ALLOCATE) {
fprintf(stderr,"Allocating memory for uncompressed genome...");
- new->chars = (char *) Access_allocated(&new->len,&seconds,filename,sizeof(char));
+ new->chars = (char *) Access_allocate(&new->chars_shmid,&new->len,&seconds,filename,sizeof(char),sharedp);
if (new->chars == NULL) {
fprintf(stderr,"insufficient memory (need to use a lower batch mode (-B))\n");
exit(9);
@@ -258,7 +333,11 @@ Genome_new (char *genomesubdir, char *fileroot, char *snps_root, Genometype_T ge
comma = Genomicpos_commafmt(new->len);
fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma,seconds);
FREE(comma);
- new->access = ALLOCATED;
+ if (sharedp == true) {
+ new->access = ALLOCATED_SHARED;
+ } else {
+ new->access = ALLOCATED_PRIVATE;
+ }
}
#ifdef HAVE_MMAP
@@ -8958,8 +9037,8 @@ Genome_uncompress_mmap (char *gbuffer1, Genomecomp_T *blocks, Univcoord_T startp
Univcoord_T startblock, endblock, ptr;
Genomecomp_T high, low, flags;
char Buffer[32];
- int startdiscard, enddiscard, i;
- Univcoord_T k = 0;
+ int startdiscard, enddiscard;
+ Univcoord_T k = 0, i;
/* sequence = (char *) CALLOC(length+1,sizeof(char)); */
@@ -10301,8 +10380,8 @@ static Genomecomp_T *genomealt_blocks; /* Can be equal to genome_blocks, but not
static Mode_T mode;
static int circular_typeint = -1;
-static unsigned char *fwd_conversion;
-static unsigned char *rev_conversion;
+static char *fwd_conversion;
+static char *rev_conversion;
void
@@ -10475,7 +10554,7 @@ Genome_fill_buffer (Chrnum_T *chrnum, int *nunknowns, T this, Univcoord_T left,
void
-Genome_fill_buffer_simple (T this, Univcoord_T left, Chrpos_T length, unsigned char *gbuffer1) {
+Genome_fill_buffer_simple (T this, Univcoord_T left, Chrpos_T length, char *gbuffer1) {
int delta, i;
#if 0
diff --git a/src/genome.h b/src/genome.h
index bb2d012..41c7430 100644
--- a/src/genome.h
+++ b/src/genome.h
@@ -1,4 +1,4 @@
-/* $Id: genome.h 145990 2014-08-25 21:47:32Z twu $ */
+/* $Id: genome.h 161940 2015-03-25 20:36:59Z twu $ */
#ifndef GENOME_INCLUDED
#define GENOME_INCLUDED
@@ -11,7 +11,6 @@
#include "chrnum.h"
#include "mode.h"
-
#define OUTOFBOUNDS '*'
typedef enum {GENOME_OLIGOS, GENOME_BITS} Genometype_T;
@@ -25,9 +24,12 @@ extern Genomecomp_T *
Genome_blocks (T this);
extern Univcoord_T
Genome_totallength (T this);
+extern void
+Genome_shmem_remove (char *genomesubdir, char *fileroot, char *snps_root, Genometype_T genometype,
+ bool genome_lc_p);
extern T
Genome_new (char *genomesubdir, char *fileroot, char *snps_root,
- Genometype_T genometype, bool genome_lc_p, Access_mode_T access);
+ Genometype_T genometype, bool genome_lc_p, Access_mode_T access, bool sharedp);
extern void
Genome_setup (T genome_in, T genomealt_in, Mode_T mode_in, int circular_typeint_in);
@@ -41,7 +43,7 @@ extern bool
Genome_fill_buffer (Chrnum_T *chrnum, int *nunknowns, T this, Univcoord_T left, Chrpos_T length, char *gbuffer1,
Univ_IIT_T chromosome_iit);
extern void
-Genome_fill_buffer_simple (T this, Univcoord_T left, Chrpos_T length, unsigned char *gbuffer1);
+Genome_fill_buffer_simple (T this, Univcoord_T left, Chrpos_T length, char *gbuffer1);
extern void
Genome_fill_buffer_convert_fwd (Univcoord_T left, Chrpos_T length, char *gbuffer1);
extern void
diff --git a/src/genome128_hr.c b/src/genome128_hr.c
index d77f6e0..f79c657 100644
--- a/src/genome128_hr.c
+++ b/src/genome128_hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genome128_hr.c 160005 2015-03-03 02:08:47Z twu $";
+static char rcsid[] = "$Id: genome128_hr.c 166739 2015-06-02 01:23:18Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -81,6 +81,13 @@ static char rcsid[] = "$Id: genome128_hr.c 160005 2015-03-03 02:08:47Z twu $";
#define debug4(x)
#endif
+/* mark mismatches */
+#ifdef DEBUG5
+#define debug5(x) x
+#else
+#define debug5(x)
+#endif
+
/* 32-bit shortcuts */
#ifdef DEBUG14
#define debug14(x) x
@@ -16485,7 +16492,7 @@ static const int score_high[] =
/* Genome_hr code starts here */
-#ifdef DEBUG
+#if defined(DEBUG) || defined(DEBUG5)
#ifdef HAVE_SSE4_1
static void
print_vector_hex (__m128i x) {
@@ -16546,7 +16553,7 @@ reduce_nt_unshuffle (UINT4 xhigh, UINT4 xlow) {
#endif
-#if defined(DEBUG) || defined(DEBUG2)
+#if defined(DEBUG) || defined(DEBUG2) || defined(DEBUG5)
static void
write_chars (Genomecomp_T high, Genomecomp_T low, Genomecomp_T flags) {
char Buffer[33];
@@ -16583,7 +16590,7 @@ write_chars (Genomecomp_T high, Genomecomp_T low, Genomecomp_T flags) {
-#if defined(DEBUG) || defined(DEBUG2)
+#if defined(DEBUG) || defined(DEBUG2) || defined(DEBUG5)
static void
Genome_print_blocks (Genomecomp_T *blocks, Univcoord_T startpos, Univcoord_T endpos) {
/* Chrpos_T length = endpos - startpos; */
@@ -16754,7 +16761,7 @@ Genome_print_blocks (Genomecomp_T *blocks, Univcoord_T startpos, Univcoord_T end
}
#endif
-#ifdef DEBUG
+#if defined(DEBUG) || defined(DEBUG5)
static void
Genome_print_blocks_snp (Genomecomp_T *blocks, Genomecomp_T *snp_blocks, Univcoord_T startpos, Univcoord_T endpos) {
/* Chrpos_T length = endpos - startpos; */
@@ -18592,7 +18599,7 @@ set_end (__m128i _diff, int enddiscard) {
return _mm_or_si128(_mask, _diff);
}
-#ifdef DEBUG
+#if defined(DEBUG) || defined(DEBUG5)
static void
print_diff_popcount (__m128i _diff) {
printf("diff: ");
@@ -20372,6 +20379,7 @@ mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_c
debug(
printf("\n\n");
+ printf("Entered mismatches_left with %d max_mismatches\n",max_mismatches);
printf("Genome (in mismatches_left):\n");
Genome_print_blocks(ref_blocks,left+pos5,left+pos3);
printf("\n");
@@ -20810,6 +20818,7 @@ mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_
debug(
printf("\n\n");
+ printf("Entered mismatches_right with %d max_mismatches\n",max_mismatches);
printf("Genome (in mismatches_right):\n");
Genome_print_blocks(ref_blocks,left+pos5,left+pos3);
printf("\n");
@@ -21225,7 +21234,7 @@ Genome_mismatches_right_trim (int *mismatch_positions, int max_mismatches, Compr
/* Derived from mismatches_left() */
int
Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_compress,
- Univcoord_T left, int pos5, int pos3, int mismatch_offset,
+ Univcoord_T left, int pos5, int pos3,
bool plusp, int genestrand, bool first_read_p) {
#ifdef DEBUG14
int answer;
@@ -21241,7 +21250,7 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
int startcolumni, endcolumni;
- debug(
+ debug5(
printf("\n\n");
printf("genomic = %s\n",genomic);
printf("Genome (in mark_mismatches_ref):\n");
@@ -21258,24 +21267,28 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
endcolumni = ((left+pos3) % 128) / 32;
endblocki_32 = endblocki + endcolumni;
- debug(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n",
- left,pos5,pos3,startblocki,endblocki));
+ debug5(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u, plusp %d\n",
+ left,pos5,pos3,startblocki,endblocki,plusp));
nshift = left % STEP_SIZE;
query_shifted = Compress_shift(query_compress,nshift);
- debug(printf("Query shifted %d:\n",nshift));
- debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
+ debug5(printf("Query shifted %d:\n",nshift));
+ debug5(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE;
if (endblocki_32 == startblocki_32) {
startdiscard = (left+pos5) % 32;
enddiscard = (left+pos3) % 32;
+#if 0
if (plusp == true) {
- offset = -startdiscard + pos5 + mismatch_offset;
+ offset = -startdiscard + pos5 /*+ mismatch_offset*/;
} else {
- offset = -startdiscard + pos5 - mismatch_offset;
+ offset = -startdiscard + pos5 /*- mismatch_offset*/;
}
- debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
+#else
+ offset = -startdiscard + pos5;
+#endif
+ debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
diff_32 = (block_diff_32)(query_shifted
@@ -21296,7 +21309,7 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
genomic[mismatch_position] = tolower(genomic[mismatch_position]);
nmismatches++;
}
- debug(printf("genomic = %s\n",genomic));
+ debug5(printf("genomic = %s\n",genomic));
#ifdef DEBUG14
answer = nmismatches;
nmismatches = 0;
@@ -21317,13 +21330,17 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
startdiscard = (left+pos5) % STEP_SIZE;
enddiscard = (left+pos3) % STEP_SIZE;
+#if 0
if (plusp == true) {
- offset = -startdiscard + pos5 + mismatch_offset;
+ offset = -startdiscard + pos5 /*+ mismatch_offset*/;
} else {
- offset = -startdiscard + pos5 - mismatch_offset;
+ offset = -startdiscard + pos5 /*- mismatch_offset*/;
}
+#else
+ offset = -startdiscard + pos5;
+#endif
- debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
+ debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
#ifndef DEBUG14
}
@@ -21339,7 +21356,7 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
while (nonzero_p(diff)) {
mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
- debug(print_diff_trailing_zeroes(diff,offset));
+ debug5(print_diff_trailing_zeroes(diff,offset));
diff = clear_lowbit(diff,relpos);
if (plusp == false) {
mismatch_position = (querylength - 1) - mismatch_position;
@@ -21347,7 +21364,7 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
genomic[mismatch_position] = tolower(genomic[mismatch_position]);
nmismatches++;
}
- debug(printf("genomic = %s\n",genomic));
+ debug5(printf("genomic = %s\n",genomic));
debug14(if (endblocki_32 == startblocki) assert(answer == nmismatches));
return nmismatches;
@@ -21361,7 +21378,7 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
while (nonzero_p(diff)) {
mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
- debug(print_diff_trailing_zeroes(diff,offset));
+ debug5(print_diff_trailing_zeroes(diff,offset));
diff = clear_lowbit(diff,relpos);
if (plusp == false) {
mismatch_position = (querylength - 1) - mismatch_position;
@@ -21384,7 +21401,7 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
while (nonzero_p(diff)) {
mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
- debug(print_diff_trailing_zeroes(diff,offset));
+ debug5(print_diff_trailing_zeroes(diff,offset));
diff = clear_lowbit(diff,relpos);
if (plusp == false) {
mismatch_position = (querylength - 1) - mismatch_position;
@@ -21408,7 +21425,7 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
while (nonzero_p(diff)) {
mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
- debug(print_diff_trailing_zeroes(diff,offset));
+ debug5(print_diff_trailing_zeroes(diff,offset));
diff = clear_lowbit(diff,relpos);
if (plusp == false) {
mismatch_position = (querylength - 1) - mismatch_position;
@@ -21416,7 +21433,7 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
genomic[mismatch_position] = tolower(genomic[mismatch_position]);
nmismatches++;
}
- debug(printf("genomic = %s\n",genomic));
+ debug5(printf("genomic = %s\n",genomic));
debug14(if (endblocki_32 == startblocki) assert(answer == nmismatches));
return nmismatches;
@@ -21428,7 +21445,7 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
/* Derived from mismatches_left_snps() */
static int
mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
- Univcoord_T left, int pos5, int pos3, int mismatch_offset,
+ Univcoord_T left, int pos5, int pos3,
bool plusp, int genestrand, bool first_read_p) {
#ifdef DEBUG14
int answer;
@@ -21444,7 +21461,7 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
int startcolumni, endcolumni;
- debug(
+ debug5(
printf("\n\n");
printf("genomic = %s\n",genomic);
printf("Genome (in mark_mismatches_snps):\n");
@@ -21461,24 +21478,28 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
endcolumni = ((left+pos3) % 128) / 32;
endblocki_32 = endblocki + endcolumni;
- debug(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n",
+ debug5(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n",
left,pos5,pos3,startblocki,endblocki));
nshift = left % STEP_SIZE;
query_shifted = Compress_shift(query_compress,nshift);
- debug(printf("Query shifted %d:\n",nshift));
- debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
+ debug5(printf("Query shifted %d:\n",nshift));
+ debug5(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE;
if (endblocki_32 == startblocki_32) {
startdiscard = (left+pos5) % 32;
enddiscard = (left+pos3) % 32;
+#if 0
if (plusp == true) {
- offset = -startdiscard + pos5 + mismatch_offset;
+ offset = -startdiscard + pos5 /*+ mismatch_offset*/;
} else {
- offset = -startdiscard + pos5 - mismatch_offset;
+ offset = -startdiscard + pos5 /*- mismatch_offset*/;
}
- debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
+#else
+ offset = -startdiscard + pos5;
+#endif
+ debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
diff_32 = (block_diff_snp_32)(query_shifted
@@ -21499,7 +21520,7 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
genomic[mismatch_position] = tolower(genomic[mismatch_position]);
nmismatches_both++;
}
- debug(printf("genomic = %s\n",genomic));
+ debug5(printf("genomic = %s\n",genomic));
#ifdef DEBUG14
answer = nmismatches_both;
nmismatches_both = 0;
@@ -21520,13 +21541,17 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
startdiscard = (left+pos5) % STEP_SIZE;
enddiscard = (left+pos3) % STEP_SIZE;
+#if 0
if (plusp == true) {
- offset = -startdiscard + pos5 + mismatch_offset;
+ offset = -startdiscard + pos5 /*+ mismatch_offset*/;
} else {
- offset = -startdiscard + pos5 - mismatch_offset;
+ offset = -startdiscard + pos5 /*- mismatch_offset*/;
}
+#else
+ offset = -startdiscard + pos5;
+#endif
- debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
+ debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
#ifndef DEBUG14
}
@@ -21542,7 +21567,7 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
while (nonzero_p(diff)) {
mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
- debug(print_diff_trailing_zeroes(diff,offset));
+ debug5(print_diff_trailing_zeroes(diff,offset));
diff = clear_lowbit(diff,relpos);
if (plusp == false) {
mismatch_position = (querylength - 1) - mismatch_position;
@@ -21550,7 +21575,7 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
genomic[mismatch_position] = tolower(genomic[mismatch_position]);
nmismatches_both++;
}
- debug(printf("genomic = %s\n",genomic));
+ debug5(printf("genomic = %s\n",genomic));
debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches_both));
return nmismatches_both;
@@ -21564,7 +21589,7 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
while (nonzero_p(diff)) {
mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
- debug(print_diff_trailing_zeroes(diff,offset));
+ debug5(print_diff_trailing_zeroes(diff,offset));
diff = clear_lowbit(diff,relpos);
if (plusp == false) {
mismatch_position = (querylength - 1) - mismatch_position;
@@ -21589,7 +21614,7 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
while (nonzero_p(diff)) {
mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
- debug(print_diff_trailing_zeroes(diff,offset));
+ debug5(print_diff_trailing_zeroes(diff,offset));
diff = clear_lowbit(diff,relpos);
if (plusp == false) {
mismatch_position = (querylength - 1) - mismatch_position;
@@ -21613,7 +21638,7 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
while (nonzero_p(diff)) {
mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
- debug(print_diff_trailing_zeroes(diff,offset));
+ debug5(print_diff_trailing_zeroes(diff,offset));
diff = clear_lowbit(diff,relpos);
if (plusp == false) {
mismatch_position = (querylength - 1) - mismatch_position;
@@ -21621,7 +21646,7 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
genomic[mismatch_position] = tolower(genomic[mismatch_position]);
nmismatches_both++;
}
- debug(printf("genomic = %s\n",genomic));
+ debug5(printf("genomic = %s\n",genomic));
debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches_both));
return nmismatches_both;
@@ -21633,14 +21658,14 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
int
Genome_mark_mismatches (char *genomic, int querylength, Compress_T query_compress,
- Univcoord_T left, int pos5, int pos3, int mismatch_offset,
+ Univcoord_T left, int pos5, int pos3,
bool plusp, int genestrand, bool first_read_p) {
#if 0
if (dibasep) {
fprintf(stderr,"Not implemented\n");
#if 0
- debug(printf("Dibase_mismatches_left from %u+%d to %u+%d:\n",left,pos5,left,pos3));
+ debug5(printf("Dibase_mismatches_left from %u+%d to %u+%d:\n",left,pos5,left,pos3));
nmismatches = Dibase_mismatches_left(&(*mismatch_positions),&(*colordiffs),max_mismatches,query,
pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3);
@@ -21652,10 +21677,10 @@ Genome_mark_mismatches (char *genomic, int querylength, Compress_T query_compres
if (snp_blocks == NULL) {
return Genome_mark_mismatches_ref(&(*genomic),querylength,query_compress,
- left,pos5,pos3,mismatch_offset,plusp,genestrand,first_read_p);
+ left,pos5,pos3,plusp,genestrand,first_read_p);
} else {
return mark_mismatches_snps(&(*genomic),querylength,query_compress,
- left,pos5,pos3,mismatch_offset,plusp,genestrand,first_read_p);
+ left,pos5,pos3,plusp,genestrand,first_read_p);
}
}
diff --git a/src/genome128_hr.h b/src/genome128_hr.h
index f5bbadd..c3866cb 100644
--- a/src/genome128_hr.h
+++ b/src/genome128_hr.h
@@ -1,4 +1,4 @@
-/* $Id: genome128_hr.h 133760 2014-04-20 05:16:56Z twu $ */
+/* $Id: genome128_hr.h 166739 2015-06-02 01:23:18Z twu $ */
#ifndef GENOME128_HR_INCLUDED
#define GENOME128_HR_INCLUDED
#include "types.h"
@@ -57,11 +57,11 @@ Genome_mismatches_right_trim (int *mismatch_positions, int max_mismatches, Compr
extern int
Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_compress,
- Univcoord_T left, int pos5, int pos3, int mismatch_offset,
+ Univcoord_T left, int pos5, int pos3,
bool plusp, int genestrand, bool first_read_p);
extern int
Genome_mark_mismatches (char *genomic, int querylength, Compress_T query_compress,
- Univcoord_T left, int pos5, int pos3, int mismatch_offset,
+ Univcoord_T left, int pos5, int pos3,
bool plusp, int genestrand, bool first_read_p);
extern int
diff --git a/src/genome_sites.h b/src/genome_sites.h
index 21ee23e..49cd010 100644
--- a/src/genome_sites.h
+++ b/src/genome_sites.h
@@ -1,6 +1,7 @@
-/* $Id: genome_sites.h 106198 2013-08-28 23:07:34Z twu $ */
+/* $Id: genome_sites.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef GENOME_SITES_INCLUDED
#define GENOME_SITES_INCLUDED
+
#include "bool.h"
#include "types.h"
#include "genomicpos.h"
diff --git a/src/genomicpos.c b/src/genomicpos.c
index 92f3742..64db6b0 100644
--- a/src/genomicpos.c
+++ b/src/genomicpos.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genomicpos.c 101488 2013-07-15 16:52:36Z twu $";
+static char rcsid[] = "$Id: genomicpos.c 155282 2014-12-12 19:42:54Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -59,6 +59,47 @@ Genomicpos_commafmt (
}
+#ifdef MEMUSAGE
+/* Does not allocate memory. Used for reporting MEMUSAGE results. */
+void
+Genomicpos_commafmt_fill (char *string,
+#ifdef HAVE_64_BIT
+ UINT8 N
+#else
+ UINT4 N
+#endif
+ ) {
+ char *buffer;
+ int len, posn = 1;
+ char *ptr, *start;
+
+ buffer = (char *) CALLOC(BUFSIZE+1,sizeof(char));
+ start = ptr = &(buffer[BUFSIZE]);
+ buffer[BUFSIZE] = '\0';
+
+ if (N == 0UL) {
+ *--ptr = '0';
+ } else {
+ while (N > 0UL) {
+ *--ptr = (char)((N % 10UL) + '0');
+ N /= 10UL;
+ if (N > 0UL) {
+ if ((posn % 3) == 0) {
+ *--ptr = ',';
+ }
+ }
+ posn++;
+ }
+ }
+
+ len = start - ptr; /* Not including terminal '\0'. */
+ memcpy(string,ptr,len+1);
+ FREE(buffer);
+ return;
+}
+#endif
+
+
int
UINT8_compare (const void *a, const void *b) {
UINT8 x = * (UINT8 *) a;
diff --git a/src/genomicpos.h b/src/genomicpos.h
index 74c4ca7..dc5b739 100644
--- a/src/genomicpos.h
+++ b/src/genomicpos.h
@@ -1,4 +1,4 @@
-/* $Id: genomicpos.h 157232 2015-01-22 18:55:31Z twu $ */
+/* $Id: genomicpos.h 157225 2015-01-22 18:47:23Z twu $ */
#ifndef GENOMICPOS_INCLUDED
#define GENOMICPOS_INCLUDED
#ifdef HAVE_CONFIG_H
@@ -38,6 +38,17 @@ Genomicpos_commafmt (
UINT4 N
#endif
);
+#ifdef MEMUSAGE
+void
+Genomicpos_commafmt_fill (char *string,
+#ifdef HAVE_64_BIT
+ UINT8 N
+#else
+ UINT4 N
+#endif
+ );
+#endif
+
extern int
UINT8_compare (const void *a, const void *b);
extern int
diff --git a/src/get-genome.c b/src/get-genome.c
index 679f285..e2e6e7a 100644
--- a/src/get-genome.c
+++ b/src/get-genome.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: get-genome.c 153955 2014-11-24 17:54:45Z twu $";
+static char rcsid[] = "$Id: get-genome.c 161940 2015-03-25 20:36:59Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -479,9 +479,9 @@ print_sequence (Genome_T genome, Genome_T genomealt, Univcoord_T genomicstart, C
genomicseg = Genome_get_segment(genome,genomicstart,genomiclength,chromosome_iit,revcomp);
if (user_typestring == NULL) {
if (rawp == true) {
- Sequence_print_raw(genomicseg);
+ Sequence_stdout_raw(genomicseg);
} else {
- Sequence_print(stdout,genomicseg,uppercasep,wraplength,/*trimmedp*/false);
+ Sequence_stdout(genomicseg,uppercasep,wraplength,/*trimmedp*/false);
}
Sequence_free(&genomicseg);
}
@@ -493,9 +493,9 @@ print_sequence (Genome_T genome, Genome_T genomealt, Univcoord_T genomicstart, C
genomicseg_snp = Genome_get_segment_snp(genomealt,genomicstart,genomiclength,chromosome_iit,revcomp);
if (user_typestring == NULL) {
if (rawp == true) {
- Sequence_print_raw(genomicseg);
+ Sequence_stdout_raw(genomicseg);
} else {
- Sequence_print_alt(genomicseg,genomicseg_alt,genomicseg_snp,uppercasep,wraplength);
+ Sequence_stdout_alt(genomicseg,genomicseg_alt,genomicseg_snp,uppercasep,wraplength);
}
Sequence_free(&genomicseg_snp);
Sequence_free(&genomicseg_alt);
@@ -508,9 +508,9 @@ print_sequence (Genome_T genome, Genome_T genomealt, Univcoord_T genomicstart, C
genomicseg_snp = Genome_get_segment_snp(genomealt,genomicstart,genomiclength,chromosome_iit,revcomp);
if (user_typestring == NULL) {
if (rawp == true) {
- Sequence_print_raw(genomicseg);
+ Sequence_stdout_raw(genomicseg);
} else {
- Sequence_print_two(genomicseg,genomicseg_snp,uppercasep,wraplength);
+ Sequence_stdout_two(genomicseg,genomicseg_snp,uppercasep,wraplength);
}
Sequence_free(&genomicseg_snp);
Sequence_free(&genomicseg);
@@ -554,9 +554,9 @@ print_sequence (Genome_T genome, Genome_T genomealt, Univcoord_T genomicstart, C
if (nindices == 0) {
/* Print reference strain */
if (rawp == true) {
- Sequence_print_raw(genomicseg);
+ Sequence_stdout_raw(genomicseg);
} else {
- Sequence_print(stdout,genomicseg,uppercasep,wraplength,/*trimmedp*/false);
+ Sequence_stdout(genomicseg,uppercasep,wraplength,/*trimmedp*/false);
}
Sequence_free(&genomicseg);
}
@@ -601,9 +601,9 @@ print_sequence (Genome_T genome, Genome_T genomealt, Univcoord_T genomicstart, C
dbversion,genomicstart+1,SEPARATOR,genomicstart+genomiclength,strain);
}
if (rawp == true) {
- Sequence_print_raw(genomicseg);
+ Sequence_stdout_raw(genomicseg);
} else {
- Sequence_print(stdout,genomicseg,uppercasep,wraplength,/*trimmedp*/false);
+ Sequence_stdout(genomicseg,uppercasep,wraplength,/*trimmedp*/false);
}
Sequence_free(&genomicseg);
FREE(gbuffer3);
@@ -1166,15 +1166,15 @@ main (int argc, char *argv[]) {
if (snps_root == NULL || print_snps_mode == 0) {
genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- uncompressedp,/*access*/USE_MMAP_ONLY);
+ uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
} else if (print_snps_mode == 2) {
genome = Genome_new(snpsdir,fileroot,snps_root,/*genometype*/GENOME_OLIGOS,
- uncompressedp,/*access*/USE_MMAP_ONLY);
+ uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
} else if (print_snps_mode == 1 || print_snps_mode == 3) {
genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- uncompressedp,/*access*/USE_MMAP_ONLY);
+ uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
genomealt = Genome_new(snpsdir,fileroot,snps_root,/*genometype*/GENOME_OLIGOS,
- uncompressedp,/*access*/USE_MMAP_ONLY);
+ uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
}
for (indx = 1; indx <= Univ_IIT_total_nintervals(chromosome_iit); indx++) {
@@ -1264,15 +1264,15 @@ main (int argc, char *argv[]) {
if (snps_root == NULL || print_snps_mode == 0) {
genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- uncompressedp,/*access*/USE_MMAP_ONLY);
+ uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
} else if (print_snps_mode == 2) {
genome = Genome_new(snpsdir,fileroot,snps_root,/*genometype*/GENOME_OLIGOS,
- uncompressedp,/*access*/USE_MMAP_ONLY);
+ uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
} else if (print_snps_mode == 1 || print_snps_mode == 3) {
genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- uncompressedp,/*access*/USE_MMAP_ONLY);
+ uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
genomealt = Genome_new(snpsdir,fileroot,snps_root,/*genometype*/GENOME_OLIGOS,
- uncompressedp,/*access*/USE_MMAP_ONLY);
+ uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
}
if (Parserange_universal(&segment,&revcomp,&genomicstart,&genomiclength,&chrstart,&chrend,
@@ -1305,16 +1305,16 @@ main (int argc, char *argv[]) {
if (exonsp == true || sequencep == true) {
if (snps_root == NULL || print_snps_mode == 0) {
genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- uncompressedp,/*access*/USE_MMAP_ONLY);
+ uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
} else if (print_snps_mode == 2) {
genome = Genome_new(snpsdir,fileroot,snps_root,/*genometype*/GENOME_OLIGOS,
- uncompressedp,/*access*/USE_MMAP_ONLY);
+ uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
} else if (print_snps_mode == 1 || print_snps_mode == 3) {
genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- uncompressedp,/*access*/USE_MMAP_ONLY);
+ uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
#if 0
genomealt = Genome_new(snpsdir,fileroot,snps_root,/*genometype*/GENOME_OLIGOS,
- uncompressedp,/*access*/USE_MMAP_ONLY);
+ uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
#endif
}
}
@@ -1462,15 +1462,15 @@ main (int argc, char *argv[]) {
if (snps_root == NULL || print_snps_mode == 0) {
genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- uncompressedp,/*access*/USE_MMAP_ONLY);
+ uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
} else if (print_snps_mode == 2) {
genome = Genome_new(snpsdir,fileroot,snps_root,/*genometype*/GENOME_OLIGOS,
- uncompressedp,/*access*/USE_MMAP_ONLY);
+ uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
} else if (print_snps_mode == 1 || print_snps_mode == 3) {
genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- uncompressedp,/*access*/USE_MMAP_ONLY);
+ uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
genomealt = Genome_new(snpsdir,fileroot,snps_root,/*genometype*/GENOME_OLIGOS,
- uncompressedp,/*access*/USE_MMAP_ONLY);
+ uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
}
iitfile = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+strlen(fileroot)+
diff --git a/src/gmap.c b/src/gmap.c
index d668a1c..60f8f29 100644
--- a/src/gmap.c
+++ b/src/gmap.c
@@ -1,8 +1,13 @@
-static char rcsid[] = "$Id: gmap.c 158355 2015-02-10 19:08:45Z twu $";
+static char rcsid[] = "$Id: gmap.c 166641 2015-05-29 21:13:04Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
+#ifdef USE_MPI
+#include <mpi.h>
+#include "mpidebug.h"
+#endif
+
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h> /* Needed to define pthread_t on Solaris */
#endif
@@ -67,6 +72,7 @@ static char rcsid[] = "$Id: gmap.c 158355 2015-02-10 19:08:45Z twu $";
#include "dynprog_single.h"
#include "dynprog_genome.h"
#include "dynprog_end.h"
+#include "pair.h"
#include "stage3.h"
#include "comp.h"
#include "chimera.h"
@@ -85,8 +91,12 @@ static char rcsid[] = "$Id: gmap.c 158355 2015-02-10 19:08:45Z twu $";
#include "iit-read-univ.h"
#include "iit-read.h"
#include "datadir.h"
+
+#include "filestring.h"
+#include "output.h"
#include "inbuffer.h"
#include "outbuffer.h"
+
#include "getopt.h"
@@ -115,6 +125,14 @@ static char rcsid[] = "$Id: gmap.c 158355 2015-02-10 19:08:45Z twu $";
/* #define EXTRACT_GENOMICSEG 1 */
+/* MPI Processing */
+#ifdef DEBUGM
+#define debugm(x) x
+#else
+#define debugm(x)
+#endif
+
+
#ifdef DEBUG
#define debug(x) x
#else
@@ -151,8 +169,9 @@ static char rcsid[] = "$Id: gmap.c 158355 2015-02-10 19:08:45Z twu $";
static Univ_IIT_T chromosome_iit = NULL;
static Univcoord_T genomelength;
static int circular_typeint = -1;
-static int nchrs;
+static int nchromosomes;
static bool *circularp = NULL;
+static bool any_circular_p;
static Univ_IIT_T contig_iit = NULL;
static Genome_T genomecomp = NULL;
static Genome_T genomecomp_alt = NULL;
@@ -239,7 +258,6 @@ static int extraband_paired = 14; /* This is in addition to length2 - length1 */
static int minendexon = 9;
static Stopwatch_T stopwatch = NULL;
-static int nextchar = '\0';
/************************************************************************
@@ -254,12 +272,14 @@ static char *user_genomicseg = NULL;
static bool user_selfalign_p = false;
static bool user_pairalign_p = false;
static char *user_cmdline = NULL;
-static Sequence_T usersegment = NULL;
+static Sequence_T global_usersegment = NULL;
static int part_modulus = 0;
static int part_interval = 1;
/* Compute options */
static int min_matches;
+
+static bool sharedp = true;
static Access_mode_T offsetsstrm_access = USE_ALLOCATE;
static bool expand_offsets_p = false;
@@ -274,15 +294,9 @@ static Access_mode_T genome_access = USE_ALLOCATE;
static int min_intronlength = 9;
static int max_deletionlength = 50;
static int maxtotallen_bound = 2400000;
-static int maxintronlen_bound = 200000; /* Was used previously in stage 1. Now used only in stage 2 and Stage3_mergeable. */
+static int maxintronlen = 200000; /* Was used previously in stage 1. Now used only in stage 2 and Stage3_mergeable. */
static int maxextension = 1000000; /* Used in stage 1. Not adjustable by user */
static int chimera_margin = 30; /* Useful for finding readthroughs */
-static bool maponlyp = false;
-#ifdef PMAP
-static bool userstage1p = false; /* Apply stage 1 for user-provided genomic segments. Must be false. */
-#else
-static bool userstage1p = false; /* Apply stage 1 for user-provided genomic segments */
-#endif
static int index1interval = 3; /* Stage 1 interval if user provides a genomic segment */
static char *referencefile = NULL;
@@ -292,9 +306,15 @@ static bool literalrefp = false;
#endif
#endif
+#ifdef USE_MPI
+static int nprocs, n_worker_procs, proci, myid;
+#endif
+
+
static bool altstrainp = false;
#ifdef HAVE_PTHREAD
static pthread_t output_thread_id, *worker_thread_ids;
+static pthread_key_t global_request_key;
static int nworkers = 1; /* (int) sysconf(_SC_NPROCESSORS_ONLN) */
#else
static int nworkers = 0; /* (int) sysconf(_SC_NPROCESSORS_ONLN) */
@@ -328,9 +348,9 @@ static bool debug_graphic_p = false;
static bool stage1debug = false;
static bool diag_debug = false;
static Stage3debug_T stage3debug = NO_STAGE3DEBUG;
-static bool diagnosticp = false;
+static bool timingp = false;
static bool checkp = false;
-static int maxpaths = 5; /* 0 means 1 if nonchimeric, 2 if chimeric */
+static int maxpaths_report = 5; /* 0 means 1 if nonchimeric, 2 if chimeric */
static bool quiet_if_excessive_p = false;
static int suboptimal_score = 1000000;
static bool require_splicedir_p = false;
@@ -359,7 +379,6 @@ static bool checksump = false;
static int chimera_overlap = 0;
static bool force_xs_direction_p = false;
static bool md_lowercase_variant_p = false;
-static Cigar_action_T cigar_action = CIGAR_ACTION_WARNING;
/* Map file options */
static char *user_mapdir = NULL;
@@ -422,13 +441,12 @@ static Triecontent_T *triecontents_max = NULL;
/* Input/output */
-static char *sevenway_root = NULL;
+static char *split_output_root = NULL;
static char *failedinput_root = NULL;
static bool appendp = false;
static Inbuffer_T inbuffer = NULL;
static Outbuffer_T outbuffer = NULL;
static unsigned int inbuffer_nspaces = 1000;
-static unsigned int inbuffer_maxchars = -1U; /* Currently not used by Inbuffer_T */
#ifdef PMAP
@@ -460,7 +478,7 @@ static struct option long_options[] = {
#endif
{"expand-offsets", required_argument, 0, 0}, /* expand_offsets_p */
{"min-intronlength", required_argument, 0, 0}, /* min_intronlength */
- {"intronlength", required_argument, 0, 'K'}, /* maxintronlen_bound */
+ {"intronlength", required_argument, 0, 'K'}, /* maxintronlen */
{"totallength", required_argument, 0, 'L'}, /* maxtotallen_bound */
{"chimera-margin", required_argument, 0, 'x'}, /* chimera_margin */
{"no-chimeras", no_argument, 0, 0}, /* chimera_margin */
@@ -499,16 +517,16 @@ static struct option long_options[] = {
{"continuous-by-exon", no_argument, 0, '4'}, /* printtype */
{"noexceptions", no_argument, 0, '0'}, /* exception_raise_p */
{"graphic", no_argument, 0, '6'}, /* debug_graphic_p */
- {"stage3debug", required_argument, 0, '8'}, /* stage3debug, diagnosticp */
+ {"stage3debug", required_argument, 0, '8'}, /* stage3debug */
{"diagnostic", no_argument, 0, '9'}, /* checkp */
- {"npaths", required_argument, 0, 'n'}, /* maxpaths */
+ {"npaths", required_argument, 0, 'n'}, /* maxpaths_report */
#if 0
{"quiet-if-excessive", no_argument, 0, 0}, /* quiet_if_excessive_p */
#endif
{"format", required_argument, 0, 'f'}, /* printtype */
{"failsonly", no_argument, 0, 0}, /* failsonlyp */
{"nofails", no_argument, 0, 0}, /* nofailsp */
- {"split-output", required_argument, 0, 0}, /* sevenway_root */
+ {"split-output", required_argument, 0, 0}, /* split_output_root */
{"failed-input", required_argument, 0, 0}, /* failedinput_root */
{"append-output", no_argument, 0, 0}, /* appendp */
{"suboptimal-score", required_argument, 0, 0}, /* suboptimal_score */
@@ -810,7 +828,7 @@ evaluate_query (bool *poorp, bool *repetitivep, char *queryuc_ptr, int queryleng
Oligoindex_set_inquery(&diagnostic->query_badoligos,&diagnostic->query_repoligos,
&diagnostic->query_trimoligos,&diagnostic->query_trim_start,
&diagnostic->query_trim_end,oligoindex,queryuc_ptr,
- querylength,/*trimp*/true);
+ /*querystart*/0,/*queryend*/querylength,/*trimp*/true);
debug2(printf("query_trimoligos %d, fraction badoligos %f = %d/%d, oligodepth %f, fraction repoligos %f = %d/%d\n",
diagnostic->query_trimoligos,
@@ -964,7 +982,7 @@ stage3array_from_list (int *npaths, int *first_absmq, int *second_absmq, List_T
static List_T
-update_stage3list (List_T stage3list, bool lowidentityp, Sequence_T queryseq,
+update_stage3list (List_T stage3list, Sequence_T queryseq,
#ifdef PMAP
Sequence_T queryntseq,
#endif
@@ -975,7 +993,6 @@ update_stage3list (List_T stage3list, bool lowidentityp, Sequence_T queryseq,
Chrpos_T chrstart, Chrpos_T chrend, bool watsonp, int genestrand,
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Stopwatch_T worker_stopwatch) {
- bool do_final_p;
int stage2_source, stage2_indexsize;
#ifdef PMAP
@@ -997,9 +1014,11 @@ update_stage3list (List_T stage3list, bool lowidentityp, Sequence_T queryseq,
int nmatches_posttrim, max_match_length, ambig_end_length_5, ambig_end_length_3;
Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
double ambig_prob_5, ambig_prob_3;
- double defect_rate, min_splice_prob;
+ double min_splice_prob;
double stage3_runtime;
+#ifdef PMAP
int subseq_offset;
+#endif
#ifdef PMAP_OLD
@@ -1021,6 +1040,7 @@ update_stage3list (List_T stage3list, bool lowidentityp, Sequence_T queryseq,
genomicuc_ptr = Sequence_fullpointer(genomicuc);
#endif
+#if 0
if (canonical_mode == 0) {
do_final_p = false;
} else if (canonical_mode == 1) {
@@ -1030,6 +1050,7 @@ update_stage3list (List_T stage3list, bool lowidentityp, Sequence_T queryseq,
} else {
do_final_p = true;
}
+#endif
debug2(printf("Beginning Stage2_compute with chrstart %u and chrend %u and query_subseq_offset %d\n",
chrstart,chrend,Sequence_subseq_offset(queryseq)));
@@ -1038,10 +1059,10 @@ update_stage3list (List_T stage3list, bool lowidentityp, Sequence_T queryseq,
Sequence_trimlength(queryseq),/*query_offset*/0,
chrstart,chrend,chroffset,chrhigh,/*plusp*/watsonp,genestrand,
stage2_alloc,oligoindices_major,/*proceed_pctcoverage*/0.3,
- pairpool,diagpool,cellpool,sufflookback,nsufflookback,maxintronlen_bound,
+ pairpool,diagpool,cellpool,
/*localp*/true,/*skip_repetitive_p*/true,
/*favor_right_p*/false,/*max_nalignments*/MAX_NALIGNMENTS,debug_graphic_p,
- diagnosticp,worker_stopwatch,diag_debug);
+ worker_stopwatch,diag_debug);
debug(printf("End of Stage2_compute\n"));
@@ -1057,7 +1078,8 @@ update_stage3list (List_T stage3list, bool lowidentityp, Sequence_T queryseq,
&ambig_end_length_5,&ambig_end_length_3,
&ambig_splicetype_5,&ambig_splicetype_3,
&ambig_prob_5,&ambig_prob_3,&unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
- &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,stage2,
+ &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,
+ Stage2_middle(stage2),Stage2_all_starts(stage2),Stage2_all_ends(stage2),
#ifdef PMAP
/*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
/*queryseq_ptr*/Sequence_subseq_pointer(queryntseq,subseq_offset),
@@ -1075,14 +1097,9 @@ update_stage3list (List_T stage3list, bool lowidentityp, Sequence_T queryseq,
chrnum,chroffset,chrhigh,
/*knownsplice_limit_low*/0U,/*knownsplice_limit_high*/-1U,
watsonp,genestrand,/*jump_late_p*/watsonp ? false : true,
- maxpeelback,maxpeelback_distalmedial,nullgap,
- extramaterial_end,extramaterial_paired,
- extraband_single,extraband_end,extraband_paired,
- minendexon,pairpool,dynprogL,dynprogM,dynprogR,ngap,
- diagnosticp,checkp,do_final_p,sense_try,sense_filter,
- oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,maxintronlen_bound,close_indels_mode,
- /*paired_favor_mode*/0,/*zero_offset*/0);
+
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ sense_try,sense_filter,oligoindices_minor,diagpool,cellpool);
stage3_runtime = Stopwatch_stop(worker_stopwatch);
if (pairarray == NULL) {
/* Skip */
@@ -1113,32 +1130,6 @@ update_stage3list (List_T stage3list, bool lowidentityp, Sequence_T queryseq,
return stage3list;
}
-#if 0
-static List_T
-update_stage3list_maponlyp (List_T stage3list, Gregion_T gregion, Sequence_T queryseq,
-#ifdef PMAP
- Sequence_T queryntseq,
-#endif
- Sequence_T queryuc, Pairpool_T pairpool, int straintype, char *strain, Genome_T genome,
- Chrnum_T chrnum, Univcoord_T chroffset, Chrpos_T chrpos, Chrpos_T chrlength,
- bool watsonp, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR) {
- Stage3_T stage3;
-
- if ((stage3 = Stage3_direct(gregion,
-#ifdef PMAP
- queryseq,queryntseq,queryntseq,
-#else
- queryseq,queryuc,
-#endif
- pairpool,genome,chrnum,chroffset,chrpos,watsonp,ngap,
- dynprogL,dynprogR,extramaterial_end,extraband_end)) != NULL) {
- stage3list = List_push(stage3list,stage3);
- }
-
- return stage3list;
-}
-#endif
-
#if 0
/* This code is duplicated in get-genome.c */
@@ -1176,7 +1167,7 @@ index_compare (const void *a, const void *b) {
static Stage3_T *
stage3_from_usersegment (int *npaths, int *first_absmq, int *second_absmq,
- bool lowidentityp, Sequence_T queryseq, Sequence_T queryuc,
+ Sequence_T queryseq, Sequence_T queryuc,
#ifdef PMAP
Sequence_T queryntseq,
#endif
@@ -1197,7 +1188,7 @@ stage3_from_usersegment (int *npaths, int *first_absmq, int *second_absmq,
chroffset = chrpos = 0U;
chrhigh = chrlength = Sequence_fulllength(usersegment);
- stage3list = update_stage3list(/*stage3list*/NULL,lowidentityp,queryseq,
+ stage3list = update_stage3list(/*stage3list*/NULL,queryseq,
#ifdef PMAP
queryntseq,
#endif
@@ -1211,7 +1202,7 @@ stage3_from_usersegment (int *npaths, int *first_absmq, int *second_absmq,
revcomp = Sequence_revcomp(usersegment);
#endif
- stage3list = update_stage3list(stage3list,lowidentityp,queryseq,
+ stage3list = update_stage3list(stage3list,queryseq,
#ifdef PMAP
queryntseq,
#endif
@@ -1348,7 +1339,7 @@ stage3list_sort (List_T stage3list) {
static List_T
stage3list_filter_and_sort (Chimera_T *chimera, List_T stage3list) {
List_T sorted = NULL;
- Stage3_T *array, stage3, from, to;
+ Stage3_T *array, stage3;
int n, i;
if ((n = List_length(stage3list)) == 0) {
@@ -1422,7 +1413,7 @@ stage3list_filter_and_sort (Chimera_T *chimera, List_T stage3list) {
static List_T
-stage3_from_gregions (List_T stage3list, List_T gregions, bool lowidentityp,
+stage3_from_gregions (List_T stage3list, List_T gregions,
Sequence_T queryseq, Sequence_T queryuc,
#ifdef PMAP
Sequence_T queryntseq,
@@ -1433,7 +1424,6 @@ stage3_from_gregions (List_T stage3list, List_T gregions, bool lowidentityp,
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Stopwatch_T worker_stopwatch) {
Gregion_T gregion, *array;
- char *strain;
int ngregions, ncovered, max_ncovered, stage2_source;
int i;
#if 0
@@ -1463,11 +1453,8 @@ stage3_from_gregions (List_T stage3list, List_T gregions, bool lowidentityp,
Gregion_chrstart(gregion),Gregion_chrend(gregion),
Gregion_chroffset(gregion),Gregion_chrhigh(gregion),
/*plusp*/Gregion_revcompp(gregion) ? false : true,Gregion_genestrand(gregion),
- stage2_alloc,oligoindices_major,diagpool,debug_graphic_p,diagnosticp);
+ stage2_alloc,oligoindices_major,diagpool,debug_graphic_p);
Gregion_set_ncovered(gregion,ncovered,stage2_source);
- if (diagnosticp == true) {
- fprintf(stderr,"Scanned %d ncovered\n",ncovered);
- }
#if defined(EXTRACT_GENOMICSEG)
Sequence_free(&genomicuc);
Sequence_free(&genomicseg);
@@ -1489,9 +1476,6 @@ stage3_from_gregions (List_T stage3list, List_T gregions, bool lowidentityp,
while (i < ngregions && Gregion_ncovered(array[i]) > 0.25*max_ncovered) {
debug(printf("Keeping %d ncovered relative to %d\n",Gregion_ncovered(array[i]),max_ncovered));
gregions = List_push(gregions,(void *) array[i]);
- if (diagnosticp == true) {
- fprintf(stderr,"Keeping %d ncovered relative to %d\n",Gregion_ncovered(array[i]),max_ncovered);
- }
i++;
}
while (i < ngregions) {
@@ -1510,8 +1494,8 @@ stage3_from_gregions (List_T stage3list, List_T gregions, bool lowidentityp,
if (1) {
if (usersegment != NULL) {
/* chrlength = Sequence_fulllength(usersegment); */
- strain = NULL;
- stage3list = update_stage3list(stage3list,lowidentityp,queryseq,
+ /* strain = NULL; */
+ stage3list = update_stage3list(stage3list,queryseq,
#ifdef PMAP
queryntseq,
#endif
@@ -1522,29 +1506,8 @@ stage3_from_gregions (List_T stage3list, List_T gregions, bool lowidentityp,
Gregion_chrstart(gregion),Gregion_chrend(gregion),
Gregion_plusp(gregion),Gregion_genestrand(gregion),
dynprogL,dynprogM,dynprogR,worker_stopwatch);
- } else if (maponlyp == true) {
- fprintf(stderr,"maponlyp mode not currently supported\n");
- exit(9);
-#if 0
- stage3list = update_stage3list_maponlyp(stage3list,gregion,queryseq,
-#ifdef PMAP
- queryntseq,
-#endif
- queryuc,pairpool,/*straintype*/0,/*strain*/NULL,genome,
- Gregion_chrnum(gregion),Gregion_chroffset(gregion),
- Gregion_chrpos(gregion),Gregion_chrlength(gregion),Gregion_plusp(gregion),
- dynprogL,dynprogM,dynprogR);
-#endif
-
} else {
-#if 0
- if (diagnosticp == true) {
- printf("Got sequence at %u with length %u, revcomp %d\n",
- Gregion_genomicstart(gregion),Gregion_genomiclength(gregion),Gregion_revcompp(gregion));
- }
-#endif
-
- stage3list = update_stage3list(stage3list,lowidentityp,queryseq,
+ stage3list = update_stage3list(stage3list,queryseq,
#ifdef PMAP
queryntseq,
#endif
@@ -1568,7 +1531,7 @@ stage3_from_gregions (List_T stage3list, List_T gregions, bool lowidentityp,
while (j < nindices) {
i = j++;
straintype = Interval_type(IIT_interval(altstrain_iit,indexarray[i]));
- strain = IIT_typestring(altstrain_iit,straintype);
+ /* strain = IIT_typestring(altstrain_iit,straintype); */
while (j < nindices && Interval_type(IIT_interval(altstrain_iit,indexarray[j])) == straintype) {
j++;
}
@@ -1578,7 +1541,7 @@ stage3_from_gregions (List_T stage3list, List_T gregions, bool lowidentityp,
Gregion_revcompp(gregion),
Gbuffer_chars1(gbuffer),Gbuffer_chars2(gbuffer),Gbuffer_chars3(gbuffer),
Gbuffer_gbufferlen(gbuffer));
- stage3list = update_stage3list(stage3list,lowidentityp,queryseq,
+ stage3list = update_stage3list(stage3list,queryseq,
#ifdef PMAP
queryntseq,
#endif
@@ -1777,7 +1740,7 @@ local_separate_paths (Stage3_T **stage3array_sub1, int *npaths_sub1,
to = by_querystart[j];
if (Chimera_local_join_p(from,to,CHIMERA_SLOP) == true) {
- debug2(printf("Found join from %d to %d\n",i,j));
+ debug2(printf("Found local join from %d to %d\n",i,j));
Stage3_set_joinable_left(from);
Stage3_set_joinable_right(to);
}
@@ -1794,9 +1757,11 @@ local_separate_paths (Stage3_T **stage3array_sub1, int *npaths_sub1,
for (p = stage3list; p != NULL; p = List_next(p)) {
stage3 = (Stage3_T) List_head(p);
if (Stage3_joinable_left_p(stage3) == true) {
+ debug2(printf("Putting stage3 %p into local sub1\n",stage3));
(*npaths_sub1)++;
}
if (Stage3_joinable_right_p(stage3) == true) {
+ debug2(printf("Putting stage3 %p into local sub2\n",stage3));
(*npaths_sub2)++;
}
}
@@ -1881,7 +1846,7 @@ distant_separate_paths (Stage3_T **stage3array_sub1, int *npaths_sub1,
to = by_querystart[j];
if (Chimera_distant_join_p(from,to,CHIMERA_SLOP) == true) {
- debug2(printf("Found join from %d to %d\n",i,j));
+ debug2(printf("Found distant join from %d to %d\n",i,j));
Stage3_set_joinable_left(from);
Stage3_set_joinable_right(to);
}
@@ -1921,9 +1886,11 @@ distant_separate_paths (Stage3_T **stage3array_sub1, int *npaths_sub1,
} else {
/* Note: it is possible that the same stage3 object gets put into both lists */
if (Stage3_joinable_left_p(stage3) == true) {
+ debug2(printf("Putting stage3 %p into distant sub1\n",stage3));
(*stage3array_sub1)[j++] = stage3;
}
if (Stage3_joinable_right_p(stage3) == true) {
+ debug2(printf("Putting stage3 %p into distant sub2\n",stage3));
(*stage3array_sub2)[k++] = stage3;
}
}
@@ -1942,10 +1909,9 @@ merge_left_and_right_readthrough (bool *mergedp, Stage3_T *stage3array_sub1, int
#ifdef PMAP
char *queryaaseq_ptr,
#endif
- Sequence_T queryseq, char *queryseq_ptr, char *queryuc_ptr,
+ char *queryseq_ptr, char *queryuc_ptr,
Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
- int ngap) {
+ Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
List_T newstage3list, p;
Stage3_T best0, best1, *array, last, freed0 = NULL, freed1 = NULL;
int i, k;
@@ -1985,12 +1951,9 @@ merge_left_and_right_readthrough (bool *mergedp, Stage3_T *stage3array_sub1, int
#ifdef PMAP
queryaaseq_ptr,
#endif
- queryseq,queryseq_ptr,queryuc_ptr,
+ queryseq_ptr,queryuc_ptr,
pairpool,dynprogL,dynprogM,dynprogR,
- maxpeelback,nullgap,oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,maxintronlen_bound,
- extramaterial_paired,extraband_paired,extraband_single,ngap,
- /*paired_favor_mode*/0,/*zero_offset*/0) == false) {
+ maxpeelback,oligoindices_minor,diagpool,cellpool) == false) {
newstage3list = (List_T) NULL;
newstage3list = List_push(newstage3list,(void *) best0);
@@ -2246,7 +2209,7 @@ find_breakpoint (int *cdna_direction, int *chimerapos, int *chimeraequivpos, int
/*queryseq_ptr*/Sequence_fullpointer(queryseq),
/*queryuc_ptr*/Sequence_fullpointer(queryuc),
#endif
- max_extend_p,pairpool,ngap,maxpeelback_from);
+ max_extend_p,pairpool,maxpeelback_from);
Stage3_extend_left(to,/*goal*/leftpos,
#ifdef PMAP
@@ -2256,7 +2219,7 @@ find_breakpoint (int *cdna_direction, int *chimerapos, int *chimeraequivpos, int
/*queryseq_ptr*/Sequence_fullpointer(queryseq),
/*queryuc_ptr*/Sequence_fullpointer(queryuc),
#endif
- max_extend_p,pairpool,ngap,maxpeelback_to);
+ max_extend_p,pairpool,maxpeelback_to);
debug2(printf("Before Chimera_find_breakpoint, bestfrom is %p, query %d..%d\n",
from,Stage3_querystart(from),Stage3_queryend(from)));
@@ -2317,13 +2280,12 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
int npaths_sub1 = 0, npaths_sub2 = 0;
bool lowidentityp, poorp, repetitivep;
- int max_single_goodness, chimeric_goodness, penalty, matches0, matches1;
+ int max_single_goodness;
int breakpoint, chimerapos, chimeraequivpos, exonexonpos;
- int cdna_direction, chimera_cdna_direction;
+ int chimera_cdna_direction;
char donor1, donor2, acceptor2, acceptor1;
bool donor_watsonp, acceptor_watsonp;
double donor_prob, acceptor_prob;
- char comp;
#ifdef PMAP
@@ -2401,7 +2363,7 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
chrsubset_start,chrsubset_end,matchpool,
stutterhits,diagnostic,/*worker_stopwatch*/NULL,/*nbest*/10);
debug2(printf("A. Performing Stage 3 starting with list length %d\n",List_length(stage3list)));
- stage3list = stage3_from_gregions(stage3list,gregions,lowidentityp,querysubseq,querysubuc,
+ stage3list = stage3_from_gregions(stage3list,gregions,querysubseq,querysubuc,
#ifdef PMAP
queryntseq,
#endif
@@ -2443,7 +2405,7 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
chrsubset_start,chrsubset_end,matchpool,
stutterhits,diagnostic,/*worker_stopwatch*/NULL,/*nbest*/10);
debug2(printf("B. Performing Stage 3 starting with list length %d\n",List_length(stage3list)));
- stage3list = stage3_from_gregions(stage3list,gregions,lowidentityp,querysubseq,querysubuc,
+ stage3list = stage3_from_gregions(stage3list,gregions,querysubseq,querysubuc,
#ifdef PMAP
queryntseq,
#endif
@@ -2494,7 +2456,7 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
debug2(printf("5 margin <= 3 margin. "));
debug2(printf("Beginning Stage1_compute on 3' margin from effective_end %d (%d..%d) (extension %d)\n",
effective_end,effective_end-extension,queryntlength,extension));
- debug2(Sequence_print(stdout,querysubseq,/*uppercasep*/true,wraplength,/*trimmedp*/true));
+ debug2(Sequence_stdout(querysubseq,/*uppercasep*/true,wraplength,/*trimmedp*/true));
diagnostic = evaluate_query(&poorp,&repetitivep,Sequence_fullpointer(querysubuc),Sequence_fulllength(querysubuc),
Oligoindex_array_elt(oligoindices_major,0));
@@ -2506,7 +2468,7 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
chrsubset_start,chrsubset_end,matchpool,
stutterhits,diagnostic,/*worker_stopwatch*/NULL,/*nbest*/10);
debug2(printf("C. Performing Stage 3 with list length %d\n",List_length(stage3list)));
- stage3list = stage3_from_gregions(stage3list,gregions,lowidentityp,querysubseq,querysubuc,
+ stage3list = stage3_from_gregions(stage3list,gregions,querysubseq,querysubuc,
#ifdef PMAP
queryntseq,
#endif
@@ -2536,7 +2498,7 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
debug2(printf("Recomputing on original part. "));
debug2(printf("Beginning Stage1_compute on 3' margin from effective_end %d (%d..%d), extension %d\n",
effective_end,0,effective_end,extension));
- debug2(Sequence_print(stdout,querysubseq,/*uppercasep*/true,wraplength,/*trimmedp*/true));
+ debug2(Sequence_stdout(querysubseq,/*uppercasep*/true,wraplength,/*trimmedp*/true));
diagnostic = evaluate_query(&poorp,&repetitivep,Sequence_fullpointer(querysubuc),Sequence_fulllength(querysubuc),
Oligoindex_array_elt(oligoindices_major,0));
@@ -2548,7 +2510,7 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
chrsubset_start,chrsubset_end,matchpool,
stutterhits,diagnostic,/*worker_stopwatch*/NULL,/*nbest*/10);
debug2(printf("D. Performing Stage 3 with list length %d\n",List_length(stage3list)));
- stage3list = stage3_from_gregions(stage3list,gregions,lowidentityp,querysubseq,querysubuc,
+ stage3list = stage3_from_gregions(stage3list,gregions,querysubseq,querysubuc,
#ifdef PMAP
queryntseq,
#endif
@@ -2618,7 +2580,7 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
debug2(printf("Before Stage3_mergeable, bestto is %p, query %d..%d\n",
to,Stage3_querystart(to),Stage3_queryend(to)));
- if (Stage3_mergeable(from,to,breakpoint,queryntlength,maxintronlen_bound) == true) {
+ if (Stage3_mergeable(from,to,breakpoint,queryntlength) == true) {
debug2(printf("Mergeable! -- Merging left and right as a readthrough\n"));
List_free(&stage3list);
stage3list = merge_left_and_right_readthrough(&(*mergedp),stage3array_sub1,npaths_sub1,bestfrom,
@@ -2626,16 +2588,14 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
nonjoinable,breakpoint,queryntlength,
#ifdef PMAP
/*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
- queryntseq,
/*queryseq_ptr*/Sequence_fullpointer(queryntseq),
/*queryuc_ptr*/Sequence_fullpointer(queryntseq),
#else
- queryseq,
/*queryseq_ptr*/Sequence_fullpointer(queryseq),
/*queryuc_ptr*/Sequence_fullpointer(queryuc),
#endif
pairpool,dynprogL,dynprogM,dynprogR,
- oligoindices_minor,diagpool,cellpool,ngap);
+ oligoindices_minor,diagpool,cellpool);
}
FREE(stage3array_sub2);
@@ -2681,11 +2641,10 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
int max_single_goodness, chimeric_goodness, penalty, matches0, matches1;
int breakpoint, chimerapos, chimeraequivpos, exonexonpos;
- int cdna_direction, chimera_cdna_direction;
+ int chimera_cdna_direction;
char donor1, donor2, acceptor2, acceptor1;
bool donor_watsonp, acceptor_watsonp;
double donor_prob, acceptor_prob;
- char comp;
#ifdef PMAP
@@ -2746,7 +2705,7 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
debug2(printf("5 margin > 3 margin. "));
debug2(printf("Beginning Stage1_compute on 5' margin from effective_start %d (%d..%d)\n",
effective_start,0,effective_start+extension));
- debug2a(Sequence_print(stdout,querysubseq,/*uppercasep*/true,wraplength,/*trimmedp*/true));
+ debug2a(Sequence_stdout(querysubseq,/*uppercasep*/true,wraplength,/*trimmedp*/true));
diagnostic = evaluate_query(&poorp,&repetitivep,Sequence_fullpointer(querysubuc),Sequence_fulllength(querysubuc),
Oligoindex_array_elt(oligoindices_major,0));
@@ -2758,7 +2717,7 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
chrsubset_start,chrsubset_end,matchpool,
stutterhits,diagnostic,/*worker_stopwatch*/NULL,/*nbest*/10);
debug2(printf("A. Performing Stage 3 starting with list length %d\n",List_length(stage3list)));
- stage3list = stage3_from_gregions(stage3list,gregions,lowidentityp,querysubseq,querysubuc,
+ stage3list = stage3_from_gregions(stage3list,gregions,querysubseq,querysubuc,
#ifdef PMAP
queryntseq,
#endif
@@ -2788,7 +2747,7 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
debug2(printf("Recomputing on original part. "));
debug2(printf("Beginning Stage1_compute on 5' margin from effective_start %d (%d..%d)\n",
effective_start,effective_start,queryntlength));
- debug2a(Sequence_print(stdout,querysubseq,/*uppercasep*/true,wraplength,/*trimmedp*/true));
+ debug2a(Sequence_stdout(querysubseq,/*uppercasep*/true,wraplength,/*trimmedp*/true));
diagnostic = evaluate_query(&poorp,&repetitivep,Sequence_fullpointer(querysubuc),Sequence_fulllength(querysubuc),
Oligoindex_array_elt(oligoindices_major,0));
@@ -2800,7 +2759,7 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
chrsubset_start,chrsubset_end,matchpool,
stutterhits,diagnostic,/*worker_stopwatch*/NULL,/*nbest*/10);
debug2(printf("B. Performing Stage 3 starting with list length %d\n",List_length(stage3list)));
- stage3list = stage3_from_gregions(stage3list,gregions,lowidentityp,querysubseq,querysubuc,
+ stage3list = stage3_from_gregions(stage3list,gregions,querysubseq,querysubuc,
#ifdef PMAP
queryntseq,
#endif
@@ -2846,7 +2805,7 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
debug2(printf("5 margin <= 3 margin. "));
debug2(printf("Beginning Stage1_compute on 3' margin from effective_end %d (%d..%d)\n",
effective_end,effective_end-extension,queryntlength));
- debug2(Sequence_print(stdout,querysubseq,/*uppercasep*/true,wraplength,/*trimmedp*/true));
+ debug2(Sequence_stdout(querysubseq,/*uppercasep*/true,wraplength,/*trimmedp*/true));
diagnostic = evaluate_query(&poorp,&repetitivep,Sequence_fullpointer(querysubuc),Sequence_fulllength(querysubuc),
Oligoindex_array_elt(oligoindices_major,0));
@@ -2858,7 +2817,7 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
chrsubset_start,chrsubset_end,matchpool,
stutterhits,diagnostic,/*worker_stopwatch*/NULL,/*nbest*/10);
debug2(printf("C. Performing Stage 3 with list length %d\n",List_length(stage3list)));
- stage3list = stage3_from_gregions(stage3list,gregions,lowidentityp,querysubseq,querysubuc,
+ stage3list = stage3_from_gregions(stage3list,gregions,querysubseq,querysubuc,
#ifdef PMAP
queryntseq,
#endif
@@ -2888,7 +2847,7 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
debug2(printf("Recomputing on original part. "));
debug2(printf("Beginning Stage1_compute on 3' margin from effective_end %d (%d..%d)\n",
effective_end,0,effective_end));
- debug2(Sequence_print(stdout,querysubseq,/*uppercasep*/true,wraplength,/*trimmedp*/true));
+ debug2(Sequence_stdout(querysubseq,/*uppercasep*/true,wraplength,/*trimmedp*/true));
diagnostic = evaluate_query(&poorp,&repetitivep,Sequence_fullpointer(querysubuc),Sequence_fulllength(querysubuc),
Oligoindex_array_elt(oligoindices_major,0));
@@ -2900,7 +2859,7 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
chrsubset_start,chrsubset_end,matchpool,
stutterhits,diagnostic,/*worker_stopwatch*/NULL,/*nbest*/10);
debug2(printf("D. Performing Stage 3 with list length %d\n",List_length(stage3list)));
- stage3list = stage3_from_gregions(stage3list,gregions,lowidentityp,querysubseq,querysubuc,
+ stage3list = stage3_from_gregions(stage3list,gregions,querysubseq,querysubuc,
#ifdef PMAP
queryntseq,
#endif
@@ -2943,6 +2902,7 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
stage3array_sub1,npaths_sub1,stage3array_sub2,npaths_sub2,queryntlength,
CHIMERA_SLOP,/*localp*/false) == false) {
/* Skip */
+ debug2(printf("Chimera_bestpath returns false, so skipping\n"));
FREE(stage3array_sub2);
FREE(stage3array_sub1);
@@ -2982,7 +2942,7 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
debug2(printf("Before Stage3_mergeable, bestto is %p, query %d..%d\n",
to,Stage3_querystart(to),Stage3_queryend(to)));
- if (Stage3_mergeable(from,to,breakpoint,queryntlength,maxintronlen_bound) == false &&
+ if (Stage3_mergeable(from,to,breakpoint,queryntlength) == false &&
Stage3_test_bounds(from,0,chimeraequivpos+chimera_overlap) == true &&
Stage3_test_bounds(to,chimerapos+1-chimera_overlap,queryntlength) == true &&
Stage3_merge_chimera(/*best0*/from,/*best1*/to,
@@ -2993,15 +2953,14 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
#else
Sequence_fullpointer(queryseq),Sequence_fullpointer(queryuc),
#endif
- pairpool,dynprogL,dynprogR,maxpeelback,maxpeelback_distalmedial,nullgap,
- extramaterial_end,extraband_end,ngap) == true) {
+ pairpool,dynprogL,dynprogR,maxpeelback) == true) {
- /* if maxpaths == 1, then don't want distant chimeras */
- if (maxpaths != 1) {
+ /* if maxpaths_report == 1, then don't want distant chimeras */
+ if (maxpaths_report != 1) {
+ debug2(printf("Not mergeable -- Merging left and right as a transloc\n"));
*chimera = Chimera_new(from,to,chimerapos,chimeraequivpos,exonexonpos,chimera_cdna_direction,
donor1,donor2,acceptor2,acceptor1,donor_watsonp,acceptor_watsonp,
donor_prob,acceptor_prob);
- debug2(printf("Not mergeable -- Merging left and right as a transloc\n"));
List_free(&stage3list);
debug2(printf("Before merge_left_and_right_transloc, bestfrom is %p, query %d..%d\n",
@@ -3044,8 +3003,7 @@ merge_middlepieces (List_T stage3list, Stage3_T from, Stage3_T to,
#endif
Sequence_T queryuc, int queryntlength,
Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
- int ngap) {
+ Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
List_T newstage3list = NULL, merged;
List_T nonjoinable, r;
bool mergedAp, mergedBp;
@@ -3070,16 +3028,14 @@ merge_middlepieces (List_T stage3list, Stage3_T from, Stage3_T to,
/*nonjoinable*/NULL,breakpointA,queryntlength,
#ifdef PMAP
/*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
- queryntseq,
/*queryseq_ptr*/Sequence_fullpointer(queryntseq),
/*queryuc_ptr*/Sequence_fullpointer(queryntseq),
#else
- queryseq,
/*queryseq_ptr*/Sequence_fullpointer(queryseq),
/*queryuc_ptr*/Sequence_fullpointer(queryuc),
#endif
pairpool,dynprogL,dynprogM,dynprogR,
- oligoindices_minor,diagpool,cellpool,ngap);
+ oligoindices_minor,diagpool,cellpool);
List_free(&merged);
newstage3list = merge_left_and_right_readthrough(&mergedBp,/*stage3array_sub1*/&from,/*npaths_sub1*/1,/*bestfrom*/0,
@@ -3087,16 +3043,14 @@ merge_middlepieces (List_T stage3list, Stage3_T from, Stage3_T to,
nonjoinable,breakpointB,queryntlength,
#ifdef PMAP
/*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
- queryntseq,
/*queryseq_ptr*/Sequence_fullpointer(queryntseq),
/*queryuc_ptr*/Sequence_fullpointer(queryntseq),
#else
- queryseq,
/*queryseq_ptr*/Sequence_fullpointer(queryseq),
/*queryuc_ptr*/Sequence_fullpointer(queryuc),
#endif
pairpool,dynprogL,dynprogM,dynprogR,
- oligoindices_minor,diagpool,cellpool,ngap);
+ oligoindices_minor,diagpool,cellpool);
#ifndef PMAP
Stage3_guess_cdna_direction(from);
@@ -3110,16 +3064,14 @@ merge_middlepieces (List_T stage3list, Stage3_T from, Stage3_T to,
nonjoinable,breakpointB,queryntlength,
#ifdef PMAP
/*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
- queryntseq,
/*queryseq_ptr*/Sequence_fullpointer(queryntseq),
/*queryuc_ptr*/Sequence_fullpointer(queryntseq),
#else
- queryseq,
/*queryseq_ptr*/Sequence_fullpointer(queryseq),
/*queryuc_ptr*/Sequence_fullpointer(queryuc),
#endif
pairpool,dynprogL,dynprogM,dynprogR,
- oligoindices_minor,diagpool,cellpool,ngap);
+ oligoindices_minor,diagpool,cellpool);
#ifndef PMAP
Stage3_guess_cdna_direction(middle);
#endif
@@ -3132,16 +3084,14 @@ merge_middlepieces (List_T stage3list, Stage3_T from, Stage3_T to,
nonjoinable,breakpointA,queryntlength,
#ifdef PMAP
/*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
- queryntseq,
/*queryseq_ptr*/Sequence_fullpointer(queryntseq),
/*queryuc_ptr*/Sequence_fullpointer(queryntseq),
#else
- queryseq,
/*queryseq_ptr*/Sequence_fullpointer(queryseq),
/*queryuc_ptr*/Sequence_fullpointer(queryuc),
#endif
pairpool,dynprogL,dynprogM,dynprogR,
- oligoindices_minor,diagpool,cellpool,ngap);
+ oligoindices_minor,diagpool,cellpool);
#ifndef PMAP
Stage3_guess_cdna_direction(from);
#endif
@@ -3175,9 +3125,9 @@ check_middle_piece_local (bool *foundp, List_T stage3list, Sequence_T queryseq,
#ifdef PMAP
Sequence_T queryntseq,
#endif
- int queryntlength, Sequence_T usersegment, Stage2_alloc_T stage2_alloc,
+ int queryntlength, Stage2_alloc_T stage2_alloc,
Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
- Matchpool_T matchpool, Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+ Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR) {
Sequence_T querysubseq = NULL, querysubuc = NULL;
int npaths, i, j;
@@ -3249,7 +3199,7 @@ check_middle_piece_local (bool *foundp, List_T stage3list, Sequence_T queryseq,
if ((querysubuc = Sequence_subsequence(queryuc,querystart,queryend)) != NULL) {
debug2(printf("Performing Stage 3 on %d..%d against %u..%u\n",
querystart,queryend,chrstart,chrend));
- if ((middlepieces = update_stage3list(/*stage3list*/NULL,/*lowidentityp*/false,querysubseq,
+ if ((middlepieces = update_stage3list(/*stage3list*/NULL,querysubseq,
#ifdef PMAP
queryntseq,
#endif
@@ -3285,8 +3235,8 @@ check_middle_piece_local (bool *foundp, List_T stage3list, Sequence_T queryseq,
queryseq,queryuc,queryntlength,
genomecomp,genomecomp_alt,chromosome_iit,pairpool);
- mergeableAp = Stage3_mergeable(from,/*to*/middle,breakpointA,queryntlength,maxintronlen_bound);
- mergeableBp = Stage3_mergeable(/*from*/middle,to,breakpointB,queryntlength,maxintronlen_bound);
+ mergeableAp = Stage3_mergeable(from,/*to*/middle,breakpointA,queryntlength);
+ mergeableBp = Stage3_mergeable(/*from*/middle,to,breakpointB,queryntlength);
}
r = List_next(r);
} /* End of while loop looking for dual merge */
@@ -3320,8 +3270,8 @@ check_middle_piece_local (bool *foundp, List_T stage3list, Sequence_T queryseq,
queryseq,queryuc,queryntlength,
genomecomp,genomecomp_alt,chromosome_iit,pairpool);
- mergeableAp = Stage3_mergeable(from,/*to*/middle,breakpointA,queryntlength,maxintronlen_bound);
- mergeableBp = Stage3_mergeable(/*from*/middle,to,breakpointB,queryntlength,maxintronlen_bound);
+ mergeableAp = Stage3_mergeable(from,/*to*/middle,breakpointA,queryntlength);
+ mergeableBp = Stage3_mergeable(/*from*/middle,to,breakpointB,queryntlength);
}
r = List_next(r);
} /* End of while loop looking for single merge */
@@ -3337,7 +3287,7 @@ check_middle_piece_local (bool *foundp, List_T stage3list, Sequence_T queryseq,
queryntseq,
#endif
queryuc,queryntlength,pairpool,dynprogL,dynprogM,dynprogR,
- oligoindices_minor,diagpool,cellpool,ngap);
+ oligoindices_minor,diagpool,cellpool);
List_free(&middlepieces);
}
@@ -3375,13 +3325,11 @@ check_middle_piece_chimera (bool *foundp, List_T stage3list, Sequence_T queryseq
int querystart, queryend, maxdist, dist;
int breakpointA, chimeraposA, chimeraequivposA, exonexonposA;
- int cdna_direction_A;
char donorA1, donorA2, acceptorA2, acceptorA1;
bool donor_watsonp_A, acceptor_watsonp_A;
double donor_prob_A, acceptor_prob_A;
int breakpointB, chimeraposB, chimeraequivposB, exonexonposB;
- int cdna_direction_B;
char donorB1, donorB2, acceptorB2, acceptorB1;
bool donor_watsonp_B, acceptor_watsonp_B;
double donor_prob_B, acceptor_prob_B;
@@ -3460,7 +3408,7 @@ check_middle_piece_chimera (bool *foundp, List_T stage3list, Sequence_T queryseq
chrsubset_start,chrsubset_end,matchpool,
stutterhits,diagnostic,/*worker_stopwatch*/NULL,/*nbest*/10);
debug2(printf("Performing Stage 3 starting with list length %d\n",List_length(stage3list)));
- middlepieces = stage3_from_gregions(/*stage3list*/NULL,gregions,lowidentityp,querysubseq,querysubuc,
+ middlepieces = stage3_from_gregions(/*stage3list*/NULL,gregions,querysubseq,querysubuc,
#ifdef PMAP
queryntseq,
#endif
@@ -3495,7 +3443,7 @@ check_middle_piece_chimera (bool *foundp, List_T stage3list, Sequence_T queryseq
#endif
queryseq,queryuc,queryntlength,
genomecomp,genomecomp_alt,chromosome_iit,pairpool);
- mergeableAp = Stage3_mergeable(bestfrom,/*to*/middle,breakpointA,queryntlength,maxintronlen_bound);
+ mergeableAp = Stage3_mergeable(bestfrom,/*to*/middle,breakpointA,queryntlength);
}
if (Chimera_local_join_p(middle,bestto,CHIMERA_SLOP) == true) {
breakpointB = find_breakpoint(&chimera_cdna_direction_B,&chimeraposB,&chimeraequivposB,&exonexonposB,
@@ -3507,7 +3455,7 @@ check_middle_piece_chimera (bool *foundp, List_T stage3list, Sequence_T queryseq
#endif
queryseq,queryuc,queryntlength,
genomecomp,genomecomp_alt,chromosome_iit,pairpool);
- mergeableBp = Stage3_mergeable(/*from*/middle,bestto,breakpointB,queryntlength,maxintronlen_bound);
+ mergeableBp = Stage3_mergeable(/*from*/middle,bestto,breakpointB,queryntlength);
}
}
r = List_next(r);
@@ -3515,7 +3463,7 @@ check_middle_piece_chimera (bool *foundp, List_T stage3list, Sequence_T queryseq
if (mergeableAp == true) {
debug2(printf("Middle segment found and mergeable locally with from! -- Merging as a readthrough. cdna_direction = %d\n",
- cdna_direction_A));
+ chimera_cdna_direction_A));
List_free(&nonjoinable);
nonjoinable = (List_T) NULL;
@@ -3544,20 +3492,16 @@ check_middle_piece_chimera (bool *foundp, List_T stage3list, Sequence_T queryseq
nonjoinable,breakpointA,queryntlength,
#ifdef PMAP
/*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
- queryntseq,
/*queryseq_ptr*/Sequence_fullpointer(queryntseq),
/*queryuc_ptr*/Sequence_fullpointer(queryntseq),
#else
- queryseq,
/*queryseq_ptr*/Sequence_fullpointer(queryseq),
/*queryuc_ptr*/Sequence_fullpointer(queryuc),
#endif
pairpool,dynprogL,dynprogM,dynprogR,
- oligoindices_minor,diagpool,cellpool,ngap);
+ oligoindices_minor,diagpool,cellpool);
#ifndef PMAP
- if (cdna_direction_A == 0) {
- Stage3_guess_cdna_direction(from);
- }
+ Stage3_guess_cdna_direction(from);
#endif
List_free(&nonjoinable);
@@ -3567,7 +3511,7 @@ check_middle_piece_chimera (bool *foundp, List_T stage3list, Sequence_T queryseq
} else if (mergeableBp == true) {
debug2(printf("Middle segment found and mergeable locally with to! -- Merging as a readthrough. cdna_direction = %d\n",
- cdna_direction_B));
+ chimera_cdna_direction_B));
List_free(&nonjoinable);
nonjoinable = (List_T) NULL;
@@ -3596,21 +3540,17 @@ check_middle_piece_chimera (bool *foundp, List_T stage3list, Sequence_T queryseq
nonjoinable,breakpointB,queryntlength,
#ifdef PMAP
/*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
- queryntseq,
/*queryseq_ptr*/Sequence_fullpointer(queryntseq),
/*queryuc_ptr*/Sequence_fullpointer(queryntseq),
#else
- queryseq,
/*queryseq_ptr*/Sequence_fullpointer(queryseq),
/*queryuc_ptr*/Sequence_fullpointer(queryuc),
#endif
pairpool,dynprogL,dynprogM,dynprogR,
- oligoindices_minor,diagpool,cellpool,ngap);
+ oligoindices_minor,diagpool,cellpool);
#ifndef PMAP
- if (cdna_direction_B == 0) {
- Stage3_guess_cdna_direction(middle);
- }
+ Stage3_guess_cdna_direction(middle);
#endif
List_free(&nonjoinable);
@@ -3643,7 +3583,7 @@ check_middle_piece_chimera (bool *foundp, List_T stage3list, Sequence_T queryseq
static List_T
-apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, bool lowidentityp, Sequence_T queryseq, Sequence_T queryuc,
+apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, Sequence_T queryseq, Sequence_T queryuc,
#ifdef PMAP
Sequence_T queryntseq,
#endif
@@ -3666,7 +3606,7 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, bool lowidenti
*chimera = NULL;
debug(printf("Calling stage3_from_gregions\n"));
- stage3list = stage3_from_gregions(/*stage3list*/(List_T) NULL,gregions,lowidentityp,queryseq,queryuc,
+ stage3list = stage3_from_gregions(/*stage3list*/(List_T) NULL,gregions,queryseq,queryuc,
#ifdef PMAP
queryntseq,
#endif
@@ -3759,8 +3699,8 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, bool lowidenti
#ifdef PMAP
queryntseq,
#endif
- queryntlength,usersegment,stage2_alloc,oligoindices_major,oligoindices_minor,
- matchpool,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR);
+ queryntlength,stage2_alloc,oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR);
if (foundp == true) {
/* Iterate */
testlocalp = true;
@@ -3797,7 +3737,7 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, bool lowidenti
debug2(printf("turned off\n"));
testchimerap = false;
- } else if (maxpaths == 1) {
+ } else if (maxpaths_report == 1) {
debug2(printf("maxpaths set to 1\n"));
testchimerap = false;
@@ -3836,6 +3776,7 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, bool lowidenti
#endif
queryntlength,usersegment,stage2_alloc,oligoindices_major,oligoindices_minor,
matchpool,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR);
+ debug2(printf("chimera is %p\n",*chimera));
if (*chimera != NULL) {
testchimerap = false;
} else {
@@ -3895,11 +3836,13 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, bool lowidenti
}
-static Result_T
-process_request (Request_T request, Matchpool_T matchpool, Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+static Filestring_T
+process_request (Filestring_T *fp_failedinput, double *worker_runtime, Request_T request, Sequence_T usersegment,
+ Matchpool_T matchpool, Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
Stage2_alloc_T stage2_alloc, Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Stopwatch_T worker_stopwatch) {
+ Filestring_T fp;
Result_T result;
int jobid;
Diagnostic_T diagnostic;
@@ -3923,6 +3866,10 @@ process_request (Request_T request, Matchpool_T matchpool, Pairpool_T pairpool,
Cellpool_reset(cellpool);
+ if (worker_stopwatch != NULL) {
+ Stopwatch_start(worker_stopwatch);
+ }
+
if (Sequence_fulllength_given(queryseq) <= 0) {
result = Result_new(jobid,/*mergedp*/false,(Chimera_T) NULL,(Stage3_T *) NULL,
/*npaths*/0,/*first_absmq*/0,/*second_absmq*/0,/*diagnostic*/NULL,EMPTY_SEQUENCE);
@@ -3943,28 +3890,20 @@ process_request (Request_T request, Matchpool_T matchpool, Pairpool_T pairpool,
queryntseq = Sequence_convert_to_nucleotides(queryseq);
#endif
- if (maponlyp == true) {
- diagnostic = Diagnostic_new();
- diagnostic->query_trim_start = 0;
- diagnostic->query_trim_end = Sequence_fulllength(queryseq);
- } else {
- diagnostic = evaluate_query(&poorp,&repetitivep,Sequence_fullpointer(queryuc),
- Sequence_fulllength(queryuc),Oligoindex_array_elt(oligoindices_major,0));
- }
+ diagnostic = evaluate_query(&poorp,&repetitivep,Sequence_fullpointer(queryuc),
+ Sequence_fulllength(queryuc),Oligoindex_array_elt(oligoindices_major,0));
#ifndef PMAP
if (poorp == true && prune_poor_p == true) {
result = Result_new(jobid,/*mergedp*/false,(Chimera_T) NULL,(Stage3_T *) NULL,
/*npaths*/0,/*first_absmq*/0,/*second_absmq*/0,diagnostic,POOR_SEQUENCE);
-
} else if (repetitivep == true && prune_repetitive_p == true) {
result = Result_new(jobid,/*mergedp*/false,(Chimera_T) NULL,(Stage3_T *) NULL,
/*npaths*/0,/*first_absmq*/0,/*second_absmq*/0,diagnostic,REPETITIVE);
-
}
#endif
- if (usersegment != NULL && userstage1p == false) {
+ if (usersegment != NULL) {
#ifndef PMAP
#if 0
/* Don't do Sequence_trim, because it affects sequences like NM_018406 */
@@ -3972,7 +3911,7 @@ process_request (Request_T request, Matchpool_T matchpool, Pairpool_T pairpool,
Sequence_trim(queryuc,diagnostic->query_trim_start,diagnostic->query_trim_end);
#endif
#endif
- stage3array = stage3_from_usersegment(&npaths,&first_absmq,&second_absmq,/*lowidentityp*/false,queryseq,queryuc,
+ stage3array = stage3_from_usersegment(&npaths,&first_absmq,&second_absmq,queryseq,queryuc,
#ifdef PMAP
queryntseq,
#endif
@@ -4007,7 +3946,7 @@ process_request (Request_T request, Matchpool_T matchpool, Pairpool_T pairpool,
result = Result_new_stage1debug(jobid,gregions,diagnostic,NO_FAILURE);
} else {
debug(printf("Applying stage 3\n"));
- stage3list = apply_stage3(&mergedp,&chimera,gregions,lowidentityp,queryseq,queryuc,
+ stage3list = apply_stage3(&mergedp,&chimera,gregions,queryseq,queryuc,
#ifdef PMAP
queryntseq,
#endif
@@ -4032,7 +3971,7 @@ process_request (Request_T request, Matchpool_T matchpool, Pairpool_T pairpool,
}
Oligoindex_clear_inquery(Oligoindex_array_elt(oligoindices_major,0),/*queryuc_ptr*/Sequence_fullpointer(queryuc),
- /*querylength*/Sequence_fulllength(queryuc));
+ /*querystart*/0,/*queryend*/Sequence_fulllength(queryuc));
} /* Matches not user segment and not maponly */
@@ -4042,7 +3981,11 @@ process_request (Request_T request, Matchpool_T matchpool, Pairpool_T pairpool,
Sequence_free(&queryuc);
} /* Matches sequence length > 0 */
- return result;
+ fp = Output_filestring_fromresult(&(*fp_failedinput),result,request,
+ /*headerseq*/user_pairalign_p == true ? usersegment : queryseq);
+ *worker_runtime = worker_stopwatch == NULL ? 0.00 : Stopwatch_stop(worker_stopwatch);
+ Result_free(&result);
+ return fp;
}
@@ -4054,28 +3997,284 @@ static const Except_T misc_signal_error = {"Miscellaneous signal"};
static void
signal_handler (int sig) {
- if (sig == SIGUSR1) {
-#ifdef HAVE_PTHREAD
- pthread_exit(NULL);
-#else
- exit(9);
+ Request_T request;
+ Sequence_T queryseq;
+
+ switch (sig) {
+ case SIGABRT: fprintf(stderr,"Signal received: SIGABRT\n"); break;
+ case SIGFPE: fprintf(stderr,"Signal received: SIGFPE\n"); break;
+ case SIGHUP: fprintf(stderr,"Signal received: SIGHUP\n"); break;
+ case SIGILL:
+ fprintf(stderr,"Signal received: SIGILL\n");
+ fprintf(stderr,"An illegal instruction means that this program is being run on a computer\n");
+ fprintf(stderr," with different features than the computer used to compile the program\n");
+ fprintf(stderr,"You may need to re-compile the program with fewer features by doing something like\n");
+ fprintf(stderr," ./configure --disable-simd\n");
+ break;
+ case SIGINT: fprintf(stderr,"Signal received: SIGINT\n"); break;
+ case SIGPIPE: fprintf(stderr,"Signal received: SIGPIPE\n"); break;
+ case SIGQUIT: fprintf(stderr,"Signal received: SIGQUIT\n"); break;
+ case SIGSEGV: fprintf(stderr,"Signal received: SIGSEGV\n"); break;
+ case SIGSYS: fprintf(stderr,"Signal received: SIGSYS\n"); break;
+ case SIGTERM: fprintf(stderr,"Signal received: SIGTERM\n"); break;
+ case SIGTRAP: fprintf(stderr,"Signal received: SIGTRAP\n"); break;
+ case SIGXCPU: fprintf(stderr,"Signal received: SIGXCPU\n"); break;
+ case SIGXFSZ: fprintf(stderr,"Signal received: SIGXFSZ\n"); break;
+ }
+
+ Access_emergency_cleanup();
+
+#ifdef USE_MPI
+ MPI_Barrier(MPI_COMM_WORLD);
#endif
- } else if (sig == SIGFPE) {
- Except_raise(&sigfpe_error,__FILE__,__LINE__);
- } else if (sig == SIGSEGV) {
- Except_raise(&sigsegv_error,__FILE__,__LINE__);
- } else if (sig == SIGTRAP) {
- Except_raise(&sigtrap_error,__FILE__,__LINE__);
+
+#ifdef HAVE_PTHREAD
+ request = (Request_T) pthread_getspecific(global_request_key);
+ if (request == NULL) {
+ /* fprintf(stderr,"Unable to retrieve request for thread\n"); */
} else {
- fprintf(stderr,"Signal %d\n",sig);
- Except_raise(&misc_signal_error,__FILE__,__LINE__);
+ queryseq = Request_queryseq(request);
+ if (queryseq == NULL) {
+ fprintf(stderr,"Unable to retrieve queryseq for request\n");
+ } else {
+ fprintf(stderr,"Problem sequence: ");
+ fprintf(stderr,"%s (%d bp)\n",Sequence_accession(queryseq),Sequence_fulllength(queryseq));
+ }
}
+#endif
+
+ exit(9);
+
return;
}
#endif
+
#define POOL_FREE_INTERVAL 200
+#ifdef USE_MPI
+static void
+worker_mpi_process (int worker_id, Inbuffer_T inbuffer) {
+ bool donep = false;
+ int nread = 0;
+ MPI_Status status;
+
+ Stage2_alloc_T stage2_alloc;
+ Oligoindex_array_T oligoindices_major, oligoindices_minor;
+ Dynprog_T dynprogL, dynprogM, dynprogR;
+ Matchpool_T matchpool;
+ Pairpool_T pairpool;
+ Diagpool_T diagpool;
+ Cellpool_T cellpool;
+ Stopwatch_T worker_stopwatch;
+ Request_T request;
+ Filestring_T fp, fp_failedinput;
+ Sequence_T queryseq, usersegment, pairalign_segment;
+ int filestringid, requestid, i;
+ int ret;
+ int worker_jobid = 0;
+ double worker_runtime;
+
+#ifdef MEMUSAGE
+ long int memusage_constant = 0, memusage, max_memusage;
+ char procname[12];
+ char acc[100+1], comma0[20], comma1[20], comma2[20], comma3[20], comma4[20], comma5[20];
+ sprintf(procname,"proc-%ld",worker_id);
+ Mem_usage_set_threadname(procname);
+#endif
+
+ stage2_alloc = Stage2_alloc_new(MAX_QUERYLENGTH_FOR_ALLOC);
+ oligoindices_major = Oligoindex_array_new_major(MAX_QUERYLENGTH_FOR_ALLOC,MAX_GENOMICLENGTH_FOR_ALLOC);
+ oligoindices_minor = Oligoindex_array_new_minor(MAX_QUERYLENGTH_FOR_ALLOC,MAX_GENOMICLENGTH_FOR_ALLOC);
+ dynprogL = Dynprog_new(nullgap,EXTRAQUERYGAP,maxpeelback,extramaterial_end,extramaterial_paired,
+ /*doublep*/true);
+ dynprogM = Dynprog_new(nullgap,EXTRAQUERYGAP,maxpeelback,extramaterial_end,extramaterial_paired,
+ /*doublep*/false);
+ dynprogR = Dynprog_new(nullgap,EXTRAQUERYGAP,maxpeelback,extramaterial_end,extramaterial_paired,
+ /*doublep*/true);
+ matchpool = Matchpool_new();
+ pairpool = Pairpool_new();
+ diagpool = Diagpool_new();
+ cellpool = Cellpool_new();
+ worker_stopwatch = (timingp == true) ? Stopwatch_new() : (Stopwatch_T) NULL;
+
+ usersegment = global_usersegment;
+
+ /* Except_stack_create(); -- no worker threads, so no need to store request in global_request_key */
+
+#ifdef MEMUSAGE
+ memusage_constant += Mem_usage_report_std_heap();
+ Genomicpos_commafmt_fill(comma0,memusage_constant);
+ Mem_usage_reset_heap_baseline(0);
+#endif
+
+ /* Initial message to say that we are ready for a request */
+ filestringid = -1;
+
+ /* Use a synchronized send here to make sure outbuffer is ready */
+ if ((ret = MPI_SSEND(&filestringid,1,MPI_INT,/*dest*/0,/*tag*/MPI_TAG_FILESTRING_AVAIL,MPI_COMM_WORLD)) != 0) {
+ fprintf(stderr,"MPI_SSEND returns error %d\n",ret);
+ MPI_Finalize();
+ exit(9);
+ }
+
+ while (donep == false) {
+ MPI_RECV(&requestid,1,MPI_INT,/*source*/0,/*tag*/MPI_ANY_TAG,MPI_COMM_WORLD,&status);
+ debugm(printf("worker_id %ld got request %d\n",worker_id,requestid));
+
+ while (nread < requestid &&
+ (queryseq = Inbuffer_read(&pairalign_segment,inbuffer,/*skipp*/true)) != NULL) {
+ /* No need to free queryseq */
+ nread++;
+ }
+
+ if (nread < requestid) {
+ debugm(printf("because nread %d < requestid %d, worker_id %ld is done\n",nread,requestid,worker_id));
+ donep = true;
+ } else if ((queryseq = Inbuffer_read(&pairalign_segment,inbuffer,/*skipp*/false)) == NULL) {
+ debugm(printf("because final read is NULL, worker_id %ld is done\n",worker_id));
+ donep = true;
+ } else {
+ debugm(printf("worker_id %ld starting to process request %d\n",worker_id,requestid));
+ request = Request_new(requestid,queryseq);
+ nread++;
+
+ if (user_pairalign_p == true) {
+ genomecomp_blocks = Compress_create_blocks_comp(Sequence_fullpointer(usersegment),Sequence_fulllength(usersegment));
+ genomebits_blocks = Compress_create_blocks_bits(genomecomp_blocks,Sequence_fulllength(usersegment));
+ Genome_user_setup(genomecomp_blocks);
+ Genome_hr_user_setup(genomebits_blocks,/*query_unk_mismatch_p*/false,
+ /*genome_unk_mismatch_p*/true,/*mode*/STANDARD);
+ Genome_sites_setup(genomecomp_blocks,/*snp_blocks*/NULL);
+ Maxent_hr_setup(genomecomp_blocks,/*genomealt_blocks*/genomecomp_blocks);
+#ifdef PMAP
+ Oligoindex_pmap_setup(genomecomp);
+#else
+ Oligoindex_hr_setup(genomecomp_blocks,mode);
+#endif
+ usersegment = pairalign_segment;
+ }
+
+#ifdef MEMUSAGE
+ queryseq = Request_queryseq(request);
+ fprintf(stderr,"Proc %d starting %s\n",worker_id,Sequence_accession(queryseq));
+ Mem_usage_reset_stack_max();
+ Mem_usage_reset_heap_max();
+#endif
+
+ TRY
+ fp = process_request(&fp_failedinput,&worker_runtime,request,usersegment,
+ matchpool,pairpool,diagpool,cellpool,
+ stage2_alloc,oligoindices_major,oligoindices_minor,
+ dynprogL,dynprogM,dynprogR,worker_stopwatch);
+
+ ELSE
+ queryseq = Request_queryseq(request);
+ if (Sequence_accession(queryseq) == NULL) {
+ fprintf(stderr,"Problem with unnamed sequence (%d bp)\n",Sequence_fulllength_given(queryseq));
+ } else {
+ fprintf(stderr,"Problem with sequence %s (%d bp)\n",
+ Sequence_accession(queryseq),Sequence_fulllength_given(queryseq));
+ }
+ fprintf(stderr,"To obtain a core dump, re-run program on problem sequence with the -0 [zero] flag\n");
+ fprintf(stderr,"Exiting...\n");
+ exit(9);
+ RERAISE;
+ END_TRY;
+
+ if (user_pairalign_p == true) {
+ FREE(genomebits_blocks);
+ FREE(genomecomp_blocks);
+ }
+
+ filestringid = Filestring_id(fp);
+ debugm(printf("worker proc %d sending filestring %d...",worker_id,filestringid));
+
+ /* Use a synchronized send here to make sure outbuffer is ready */
+ if ((ret = MPI_SSEND(&filestringid,1,MPI_INT,/*dest*/0,/*tag*/MPI_TAG_FILESTRING_AVAIL,MPI_COMM_WORLD)) != 0) {
+ fprintf(stderr,"MPI_SSEND returns error %d\n",ret);
+ MPI_Finalize();
+ exit(9);
+ }
+ Filestring_Send(fp,/*dest*/0,/*tag*/MPI_TAG_DEFAULT,MPI_COMM_WORLD);
+ if (failedinput_root != NULL) {
+ Filestring_Send(fp_failedinput,/*dest*/0,/*tag*/MPI_TAG_DEFAULT,MPI_COMM_WORLD);
+ }
+ debugm(printf("done with filestring %d\n",filestringid));
+
+ if (worker_jobid % POOL_FREE_INTERVAL == 0) {
+ Pairpool_free_memory(pairpool);
+ Diagpool_free_memory(diagpool);
+ Cellpool_free_memory(cellpool);
+ Matchpool_free_memory(matchpool);
+ }
+
+#ifdef MEMUSAGE
+ /* Copy acc before we free the request */
+ queryseq1 = Request_queryseq1(request);
+ strncpy(acc,Shortread_accession(queryseq1),100);
+ acc[100] = '\0';
+#endif
+
+ Request_free(&request);
+
+#ifdef MEMUSAGE
+ Genomicpos_commafmt_fill(comma1,Mem_usage_report_std_heap_max());
+ Genomicpos_commafmt_fill(comma2,Mem_usage_report_std_heap());
+ Genomicpos_commafmt_fill(comma3,Mem_usage_report_keep());
+ Genomicpos_commafmt_fill(comma4,Mem_usage_report_in());
+ Genomicpos_commafmt_fill(comma5,Mem_usage_report_out());
+
+ fprintf(stderr,"Acc %s, proc %d: constant %s max %s std %s keep %s in %s out %s\n",
+ acc,worker_id,comma0,comma1,comma2,comma3,comma4,comma5);
+
+ if ((memusage = Mem_usage_report_std_heap()) != 0) {
+ fprintf(stderr,"Memory leak in proc of %ld bytes: %ld\n",worker_id,memusage);
+ fflush(stdout);
+ MPI_Finalize();
+ exit(9);
+ }
+#endif
+ }
+ }
+
+ /* Final message to say that we are done with all requests */
+ debugm(printf("worker_id %ld sending final message to say it is done\n",worker_id));
+ filestringid = -1;
+ if ((ret = MPI_SSEND(&filestringid,1,MPI_INT,/*dest*/0,/*tag*/MPI_TAG_FILESTRING_AVAIL,MPI_COMM_WORLD)) != 0) {
+ fprintf(stderr,"MPI_SSEND returns error %d\n",ret);
+ MPI_Finalize();
+ exit(9);
+ }
+
+#ifdef MEMUSAGE
+ Mem_usage_std_heap_add(memusage_constant);
+#endif
+
+ /* Except_stack_destroy(); */
+
+ Stopwatch_free(&worker_stopwatch);
+ Cellpool_free(&cellpool);
+ Diagpool_free(&diagpool);
+ Pairpool_free(&pairpool);
+ Matchpool_free(&matchpool);
+ Dynprog_free(&dynprogR);
+ Dynprog_free(&dynprogM);
+ Dynprog_free(&dynprogL);
+ Oligoindex_array_free(&oligoindices_minor);
+ Oligoindex_array_free(&oligoindices_major);
+ Stage2_alloc_free(&stage2_alloc);
+
+#ifdef MEMUSAGE
+ Mem_usage_set_threadname("main");
+#endif
+
+ debugm(printf("worker_id %ld is now returning\n",worker_id));
+ return;
+}
+#endif
+
+
static void
single_thread () {
Stage2_alloc_T stage2_alloc;
@@ -4087,10 +4286,17 @@ single_thread () {
Cellpool_T cellpool;
Stopwatch_T worker_stopwatch;
Request_T request;
- Result_T result;
+ Sequence_T usersegment, pairalign_segment;
+ Filestring_T fp, fp_failedinput;
Sequence_T queryseq;
int noutput = 0;
int jobid = 0;
+ double worker_runtime;
+
+#ifdef MEMUSAGE
+ long int memusage_constant = 0;
+ char acc[100+1], comma0[20], comma1[20], comma2[20], comma3[20], comma4[20], comma5[20];
+#endif
stage2_alloc = Stage2_alloc_new(MAX_QUERYLENGTH_FOR_ALLOC);
oligoindices_major = Oligoindex_array_new_major(MAX_QUERYLENGTH_FOR_ALLOC,MAX_GENOMICLENGTH_FOR_ALLOC);
@@ -4105,9 +4311,20 @@ single_thread () {
pairpool = Pairpool_new();
diagpool = Diagpool_new();
cellpool = Cellpool_new();
- worker_stopwatch = diagnosticp == true ? Stopwatch_new() : (Stopwatch_T) NULL;
+ worker_stopwatch = (timingp == true) ? Stopwatch_new() : (Stopwatch_T) NULL;
+
+ usersegment = global_usersegment;
+
+ /* Except_stack_create(); -- requires pthreads */
+
+#ifdef MEMUSAGE
+ memusage_constant += Mem_usage_report_std_heap();
+ Genomicpos_commafmt_fill(comma0,memusage_constant);
+ Mem_usage_reset_heap_baseline(0);
+#endif
+
+ while ((request = Inbuffer_get_request(&pairalign_segment,inbuffer)) != NULL) {
- while ((request = Inbuffer_get_request(&usersegment,inbuffer,user_pairalign_p)) != NULL) {
if (user_pairalign_p == true) {
genomecomp_blocks = Compress_create_blocks_comp(Sequence_fullpointer(usersegment),Sequence_fulllength(usersegment));
genomebits_blocks = Compress_create_blocks_bits(genomecomp_blocks,Sequence_fulllength(usersegment));
@@ -4121,18 +4338,26 @@ single_thread () {
#else
Oligoindex_hr_setup(genomecomp_blocks,mode);
#endif
+ usersegment = pairalign_segment;
}
- if (jobid % POOL_FREE_INTERVAL == 0) {
- Pairpool_free_memory(pairpool);
- Diagpool_free_memory(diagpool);
- Cellpool_free_memory(cellpool);
- Matchpool_free_memory(matchpool);
- }
+#ifdef MEMUSAGE
+ queryseq1 = Request_queryseq1(request);
+ fprintf(stderr,"Single thread starting %s\n",Shortread_accession(queryseq1));
+ Mem_usage_reset_stack_max();
+ Mem_usage_reset_heap_max();
+#endif
+
TRY
- result = process_request(request,matchpool,pairpool,diagpool,cellpool,
- stage2_alloc,oligoindices_major,oligoindices_minor,
- dynprogL,dynprogM,dynprogR,worker_stopwatch);
+ fp = process_request(&fp_failedinput,&worker_runtime,request,usersegment,
+ matchpool,pairpool,diagpool,cellpool,
+ stage2_alloc,oligoindices_major,oligoindices_minor,
+ dynprogL,dynprogM,dynprogR,worker_stopwatch);
+ if (timingp == true) {
+ queryseq = Request_queryseq(request);
+ printf("%s\t%.6f\n",Sequence_accession(queryseq),worker_runtime);
+ }
+
ELSE
queryseq = Request_queryseq(request);
if (Sequence_accession(queryseq) == NULL) {
@@ -4147,33 +4372,56 @@ single_thread () {
RERAISE;
END_TRY;
-#ifdef MEMUSAGE
if (user_pairalign_p == true) {
- Outbuffer_print_result(outbuffer,result,request,/*headerseq*/usersegment,noutput+1);
- } else {
- Outbuffer_print_result(outbuffer,result,request,/*headerseq*/Request_queryseq(request),
- noutput+1);
+ FREE(genomebits_blocks);
+ FREE(genomecomp_blocks);
}
- Mem_usage_reset_stack_max();
- Mem_usage_reset_heap_max();
-#else
- if (user_pairalign_p == true) {
- Outbuffer_print_result(outbuffer,result,request,/*headerseq*/usersegment);
- } else {
- Outbuffer_print_result(outbuffer,result,request,/*headerseq*/Request_queryseq(request));
+
+ Outbuffer_print_filestrings(fp,fp_failedinput);
+
+ if (jobid % POOL_FREE_INTERVAL == 0) {
+ Pairpool_free_memory(pairpool);
+ Diagpool_free_memory(diagpool);
+ Cellpool_free_memory(cellpool);
+ Matchpool_free_memory(matchpool);
}
+
+#ifdef MEMUSAGE
+ /* Copy acc before we free the request */
+ queryseq1 = Request_queryseq1(request);
+ strncpy(acc,Shortread_accession(queryseq1),100);
+ acc[100] = '\0';
#endif
- Result_free(&result);
+
Request_free(&request);
- noutput++;
- if (user_pairalign_p == true) {
- FREE(genomebits_blocks);
- FREE(genomecomp_blocks);
+#ifdef MEMUSAGE
+ Genomicpos_commafmt_fill(comma1,Mem_usage_report_std_heap_max());
+ Genomicpos_commafmt_fill(comma2,Mem_usage_report_std_heap());
+ Genomicpos_commafmt_fill(comma3,Mem_usage_report_keep());
+ Genomicpos_commafmt_fill(comma4,Mem_usage_report_in());
+ Genomicpos_commafmt_fill(comma5,Mem_usage_report_out());
+
+ fprintf(stderr,"Acc %s: constant %s max %s std %s keep %s in %s out %s\n",
+ acc,comma0,comma1,comma2,comma3,comma4,comma5);
+
+ if ((memusage = Mem_usage_report_std_heap()) != 0) {
+ fprintf(stderr,"Memory leak in single thread of %ld bytes\n",memusage);
+ fflush(stdout);
+ exit(9);
}
+#endif
}
- Stopwatch_free(&worker_stopwatch);
+#ifdef MEMUSAGE
+ Mem_usage_std_heap_add(memusage_constant);
+#endif
+
+ /* Except_stack_destroy(); -- requires pthreads */
+
+ if (worker_stopwatch != NULL) {
+ Stopwatch_free(&worker_stopwatch);
+ }
Cellpool_free(&cellpool);
Diagpool_free(&diagpool);
Pairpool_free(&pairpool);
@@ -4185,6 +4433,10 @@ single_thread () {
Oligoindex_array_free(&oligoindices_major);
Stage2_alloc_free(&stage2_alloc);
+#ifdef MEMUSAGE
+ Mem_usage_set_threadname("main");
+#endif
+
return;
}
@@ -4201,9 +4453,21 @@ worker_thread (void *data) {
Cellpool_T cellpool;
Stopwatch_T worker_stopwatch;
Request_T request;
- Result_T result;
- Sequence_T queryseq;
- int jobid = 0;
+ Filestring_T fp, fp_failedinput;
+ Sequence_T queryseq, usersegment, pairalign_segment;
+ int worker_jobid = 0;
+ double worker_runtime;
+#if defined(DEBUG) || defined(MEMUSAGE)
+ long int worker_id = (long int) data;
+#endif
+
+#ifdef MEMUSAGE
+ long int memusage_constant = 0, memusage, max_memusage;
+ char threadname[12];
+ char acc[100+1], comma0[20], comma1[20], comma2[20], comma3[20], comma4[20], comma5[20];
+ sprintf(threadname,"thread-%ld",worker_id);
+ Mem_usage_set_threadname(threadname);
+#endif
/* Thread-specific data and storage */
stage2_alloc = Stage2_alloc_new(MAX_QUERYLENGTH_FOR_ALLOC);
@@ -4219,22 +4483,55 @@ worker_thread (void *data) {
pairpool = Pairpool_new();
diagpool = Diagpool_new();
cellpool = Cellpool_new();
- worker_stopwatch = diagnosticp == true ? Stopwatch_new() : (Stopwatch_T) NULL;
+ worker_stopwatch = (timingp == true) ? Stopwatch_new() : (Stopwatch_T) NULL;
+
+ usersegment = global_usersegment;
Except_stack_create();
- while ((request = Inbuffer_get_request(&usersegment,inbuffer,user_pairalign_p)) != NULL) {
- if (jobid % POOL_FREE_INTERVAL == 0) {
- Pairpool_free_memory(pairpool);
- Diagpool_free_memory(diagpool);
- Cellpool_free_memory(cellpool);
- Matchpool_free_memory(matchpool);
+#ifdef MEMUSAGE
+ memusage_constant += Mem_usage_report_std_heap();
+ Genomicpos_commafmt_fill(comma0,memusage_constant);
+ Mem_usage_reset_heap_baseline(0);
+#endif
+
+ while ((request = Inbuffer_get_request(&pairalign_segment,inbuffer)) != NULL) {
+ debug(printf("worker_thread %ld got request %d\n",worker_id,Request_id(request)));
+ pthread_setspecific(global_request_key,(void *) request);
+
+ if (user_pairalign_p == true) {
+ genomecomp_blocks = Compress_create_blocks_comp(Sequence_fullpointer(usersegment),Sequence_fulllength(usersegment));
+ genomebits_blocks = Compress_create_blocks_bits(genomecomp_blocks,Sequence_fulllength(usersegment));
+ Genome_user_setup(genomecomp_blocks);
+ Genome_hr_user_setup(genomebits_blocks,/*query_unk_mismatch_p*/false,
+ /*genome_unk_mismatch_p*/true,/*mode*/STANDARD);
+ Genome_sites_setup(genomecomp_blocks,/*snp_blocks*/NULL);
+ Maxent_hr_setup(genomecomp_blocks,/*genomealt_blocks*/genomecomp_blocks);
+#ifdef PMAP
+ Oligoindex_pmap_setup(genomecomp);
+#else
+ Oligoindex_hr_setup(genomecomp_blocks,mode);
+#endif
+ usersegment = pairalign_segment;
}
+#ifdef MEMUSAGE
+ queryseq = Request_queryseq(request);
+ fprintf(stderr,"Thread %d starting %s\n",worker_id,Sequence_accession(queryseq));
+ Mem_usage_reset_stack_max();
+ Mem_usage_reset_heap_max();
+#endif
+
TRY
- result = process_request(request,matchpool,pairpool,diagpool,cellpool,
- stage2_alloc,oligoindices_major,oligoindices_minor,
- dynprogL,dynprogM,dynprogR,worker_stopwatch);
+ fp = process_request(&fp_failedinput,&worker_runtime,request,usersegment,
+ matchpool,pairpool,diagpool,cellpool,
+ stage2_alloc,oligoindices_major,oligoindices_minor,
+ dynprogL,dynprogM,dynprogR,worker_stopwatch);
+ if (timingp == true) {
+ queryseq = Request_queryseq(request);
+ printf("%s\t%.6f\n",Sequence_accession(queryseq),worker_runtime);
+ }
+
ELSE
queryseq = Request_queryseq(request);
if (queryseq == NULL) {
@@ -4249,17 +4546,60 @@ worker_thread (void *data) {
fprintf(stderr,"Exiting...\n");
exit(9);
-
RERAISE;
END_TRY;
- Outbuffer_put_result(outbuffer,result,request);
- /* Don't free result or request; done by outbuffer thread */
+ if (user_pairalign_p == true) {
+ FREE(genomebits_blocks);
+ FREE(genomecomp_blocks);
+ }
+
+ debug(printf("worker_thread %ld putting filestring %d\n",worker_id,Filestring_id(fp)));
+ Outbuffer_put_filestrings(outbuffer,fp,fp_failedinput);
+
+ if (worker_jobid % POOL_FREE_INTERVAL == 0) {
+ Pairpool_free_memory(pairpool);
+ Diagpool_free_memory(diagpool);
+ Cellpool_free_memory(cellpool);
+ Matchpool_free_memory(matchpool);
+ }
+
+#ifdef MEMUSAGE
+ /* Copy acc before we free the request */
+ queryseq1 = Request_queryseq1(request);
+ strncpy(acc,Shortread_accession(queryseq1),100);
+ acc[100] = '\0';
+#endif
+
+ Request_free(&request);
+
+#ifdef MEMUSAGE
+ Genomicpos_commafmt_fill(comma1,Mem_usage_report_std_heap_max());
+ Genomicpos_commafmt_fill(comma2,Mem_usage_report_std_heap());
+ Genomicpos_commafmt_fill(comma3,Mem_usage_report_keep());
+ Genomicpos_commafmt_fill(comma4,Mem_usage_report_in());
+ Genomicpos_commafmt_fill(comma5,Mem_usage_report_out());
+
+ fprintf(stderr,"Acc %s, thread %d: constant %s max %s std %s keep %s in %s out %s\n",
+ acc,worker_id,comma0,comma1,comma2,comma3,comma4,comma5);
+
+ if ((memusage = Mem_usage_report_std_heap()) != 0) {
+ fprintf(stderr,"Memory leak in worker thread %ld of %ld bytes\n",worker_id,memusage);
+ fflush(stdout);
+ exit(9);
+ }
+#endif
}
+#ifdef MEMUSAGE
+ Mem_usage_std_heap_add(memusage_constant);
+#endif
+
Except_stack_destroy();
- Stopwatch_free(&worker_stopwatch);
+ if (worker_stopwatch != NULL) {
+ Stopwatch_free(&worker_stopwatch);
+ }
Cellpool_free(&cellpool);
Diagpool_free(&diagpool);
Pairpool_free(&pairpool);
@@ -4271,6 +4611,10 @@ worker_thread (void *data) {
Oligoindex_array_free(&oligoindices_major);
Stage2_alloc_free(&stage2_alloc);
+#ifdef MEMUSAGE
+ Mem_usage_set_threadname("main");
+#endif
+
return (void *) NULL;
}
#endif
@@ -4313,7 +4657,7 @@ align_relative (FILE *input, char **files, int nfiles, int nextchar,
pairpool = Pairpool_new();
diagpool = Diagpool_new();
cellpool = Cellpool_new();
- stopwatch = diagnosticp == true ? Stopwatch_new() : (Stopwatch_T) NULL;
+ stopwatch = (timingp == true) ? Stopwatch_new() : (Stopwatch_T) NULL;
Matchpool_reset(matchpool);
Pairpool_reset(pairpool);
@@ -4337,7 +4681,7 @@ align_relative (FILE *input, char **files, int nfiles, int nextchar,
/*indexdb_size_threshold*/100,chromosome_iit,
chrsubset_start,chrsubet_end,matchpool,
stutterhits,diagnostic,/*stopwatch*/NULL);
- stage3list = apply_stage3(&chimera,gregions,lowidentityp,referenceseq,referenceuc,/*usersegment*/NULL,
+ stage3list = apply_stage3(&chimera,gregions,referenceseq,referenceuc,/*usersegment*/NULL,
oligoindices_major,oligoindices_minor,
matchpool,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,stopwatch);
if (stage3list == NULL) {
@@ -4372,7 +4716,7 @@ align_relative (FILE *input, char **files, int nfiles, int nextchar,
genomicseg = Genome_get_segment(genome,genomicstart,genomiclength,chromosome_iit,/*revcomp*/false);
}
- while (jobid == 0 || (queryseq = Sequence_read_multifile(&nextchar,&input,&files,&nfiles,maponlyp)) != NULL) {
+ while (jobid == 0 || (queryseq = Sequence_read_multifile(&nextchar,&input,&files,&nfiles)) != NULL) {
Matchpool_reset(matchpool);
Pairpool_reset(pairpool);
Diagpool_reset(diagpool);
@@ -4406,7 +4750,8 @@ align_relative (FILE *input, char **files, int nfiles, int nextchar,
Oligoindex_set_inquery(&diagnostic->query_badoligos,&diagnostic->query_repoligos,
&diagnostic->query_trimoligos,&diagnostic->query_trim_start,
&diagnostic->query_trim_end,Oligoindex_array_elt(oligoindices_major,0),
- Sequence_fullpointer(queryuc),Sequence_fulllength(queryuc),/*trimp*/true);
+ Sequence_fullpointer(queryuc),/*querystart*/0,/*queryend*/Sequence_fulllength(queryuc),
+ /*trimp*/true);
if (diagnostic->query_trimoligos == 0) {
poorp = true;
@@ -4434,7 +4779,7 @@ align_relative (FILE *input, char **files, int nfiles, int nextchar,
print_npaths(fp,0,diagnostic,/*usersegment*/NULL,chrsubset,/*chimera*/NULL,REPETITIVE);
} else {
#endif /* PMAP */
- stage3array = stage3_from_usersegment(&npaths,lowidentityp,queryseq,queryuc,genomicseg,
+ stage3array = stage3_from_usersegment(&npaths,queryseq,queryuc,genomicseg,
oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,stopwatch);
@@ -4451,7 +4796,7 @@ align_relative (FILE *input, char **files, int nfiles, int nextchar,
#endif
Stage3_fix_cdna_direction(stage3array[0],stage3ref);
Stage3_print_mutations(stage3array[0],stage3ref,chromosome_iit,queryseq,
- dbversion,printtype,diagnosticp,proteinmode,
+ dbversion,printtype,proteinmode,
invertmode,nointronlenp,wraplength,
/*snps_p*/snp_blocks ? true : false,
/*maxmutations*/1000000);
@@ -4671,43 +5016,16 @@ check_valid_float (char *string, const char *option) {
}
-int
-main (int argc, char *argv[]) {
- Sequence_T referenceseq = NULL;
- char *genomesubdir = NULL, *snpsdir = NULL, *modedir = NULL, *mapdir = NULL, *iitfile = NULL, *fileroot = NULL, *p = NULL;
- FILE *input = NULL;
- Request_T request;
-
- int divno;
- Univinterval_T interval;
-
- int user_ngap = -1;
- bool showcontigp = true, multiple_sequences_p = false;
- char **files;
- int nfiles;
- unsigned int nread;
- double runtime;
-
- Splicestringpool_T splicestringpool;
-
-#ifdef HAVE_PTHREAD
- int ret, i;
- pthread_attr_t thread_attr_join;
-#ifdef WORKER_DETACH
- pthread_attr_t thread_attr_detach;
-#endif
-#endif
-
- int opt, len, c;
- extern int optind;
+static int
+parse_command_line (int argc, char *argv[], int optind) {
+ int opt, c;
extern char *optarg;
int long_option_index = 0;
const char *long_name;
char **argstart;
-#ifdef HAVE_SIGACTION
- struct sigaction signal_action;
-#endif
+ int len;
+ int user_ngap = -1;
fprintf(stderr,"GMAP version %s called with args:",PACKAGE_VERSION);
@@ -4717,7 +5035,6 @@ main (int argc, char *argv[]) {
}
fprintf(stderr,"\n");
-
while ((opt = getopt_long(argc,argv,
#ifdef PMAP
"q:D:a:d:k:Gg:2B:K:w:L:x:1t:s:c:H:SA03468:9n:f:ZO5o:V:v:M:m:ebu:E:PQYNI:i:l:",
@@ -4730,13 +5047,13 @@ main (int argc, char *argv[]) {
long_name = long_options[long_option_index].name;
if (!strcmp(long_name,"version")) {
print_program_version();
- exit(0);
+ return 1;
} else if (!strcmp(long_name,"check")) {
check_compiler_assumptions();
- exit(0);
+ return 1;
} else if (!strcmp(long_name,"help")) {
print_program_usage();
- exit(0);
+ return 1;
} else if (!strcmp(long_name,"expand-offsets")) {
if (!strcmp(optarg,"1")) {
@@ -4745,7 +5062,7 @@ main (int argc, char *argv[]) {
expand_offsets_p = false;
} else {
fprintf(stderr,"--expand-offsets flag must be 0 or 1\n");
- exit(9);
+ return 9;
}
} else if (!strcmp(long_name,"sampling")) {
@@ -4787,7 +5104,7 @@ main (int argc, char *argv[]) {
extraband_single = 3;
} else {
fprintf(stderr,"allow-close-indels argument %s not recognized. Only allow 0, 1, or 2. Run 'gsnap --help' for more information.\n",optarg);
- exit(9);
+ return 9;
}
} else if (!strcmp(long_name,"microexon-spliceprob")) {
microexon_spliceprob = check_valid_float(optarg,long_name);
@@ -4807,7 +5124,7 @@ main (int argc, char *argv[]) {
fprintf(stderr,"Canonical level %s not recognized.\n",optarg);
fprintf(stderr,"0=low reward for canonical introns, 1=high reward for canonical introns (default)\n");
fprintf(stderr,"2=low reward for high-identity seqs, high reward otherwise\n");
- exit(9);
+ return 9;
}
} else if (!strcmp(long_name,"cross-species")) {
@@ -4835,7 +5152,7 @@ main (int argc, char *argv[]) {
mode = ATOI_NONSTRANDED;
} else {
fprintf(stderr,"--mode must be standard, cmet-stranded, cmet-nonstranded, atoi-stranded, or atoi\n");
- exit(9);
+ return 9;
}
} else if (!strcmp(long_name,"min-trimmed-coverage")) {
@@ -4852,7 +5169,7 @@ main (int argc, char *argv[]) {
} else if (!strcmp(long_name,"failsonly")) {
if (nofailsp == true) {
fprintf(stderr,"Cannot specify both --nofails and --failsonly\n");
- exit(9);
+ return 9;
} else {
failsonlyp = true;
}
@@ -4865,12 +5182,12 @@ main (int argc, char *argv[]) {
} else if (!strcmp(long_name,"nofails")) {
if (failsonlyp == true) {
fprintf(stderr,"Cannot specify both --nofails and --failsonly\n");
- exit(9);
+ return 9;
} else {
nofailsp = true;
}
} else if (!strcmp(long_name,"split-output")) {
- sevenway_root = optarg;
+ split_output_root = optarg;
} else if (!strcmp(long_name,"append-output")) {
appendp = true;
} else if (!strcmp(long_name,"gff3-add-separators")) {
@@ -4880,8 +5197,9 @@ main (int argc, char *argv[]) {
gff3_separators_p = false;
} else {
fprintf(stderr,"--gff3-add-separators flag must be 0 or 1\n");
- exit(9);
+ return 9;
}
+
#ifndef PMAP
} else if (!strcmp(long_name,"no-sam-headers")) {
sam_headers_p = false;
@@ -4890,7 +5208,7 @@ main (int argc, char *argv[]) {
} else if (!strcmp(long_name,"quality-protocol")) {
if (user_quality_shift == true) {
fprintf(stderr,"Cannot specify both -j (--quality-print-shift) and --quality-protocol\n");
- exit(9);
+ return 9;
} else if (!strcmp(optarg,"illumina")) {
quality_shift = -31;
user_quality_shift = true;
@@ -4899,23 +5217,12 @@ main (int argc, char *argv[]) {
user_quality_shift = true;
} else {
fprintf(stderr,"The only values allowed for --quality-protocol are illumina or sanger\n");
- exit(9);
+ return 9;
}
} else if (!strcmp(long_name,"force-xs-dir")) {
force_xs_direction_p = true;
} else if (!strcmp(long_name,"md-lowercase-snp")) {
md_lowercase_variant_p = true;
- } else if (!strcmp(long_name,"action-if-cigar-error")) {
- if (!strcmp(optarg,"ignore")) {
- cigar_action = CIGAR_ACTION_IGNORE;
- } else if (!strcmp(optarg,"warning")) {
- cigar_action = CIGAR_ACTION_WARNING;
- } else if (!strcmp(optarg,"abort")) {
- cigar_action = CIGAR_ACTION_ABORT;
- } else {
- fprintf(stderr,"action-if-cigar-error needs to be ignore, warning, or abort\n");
- exit(9);
- }
} else if (!strcmp(long_name,"read-group-id")) {
sam_read_group_id = optarg;
} else if (!strcmp(long_name,"read-group-name")) {
@@ -4928,7 +5235,7 @@ main (int argc, char *argv[]) {
} else {
/* Shouldn't reach here */
fprintf(stderr,"Don't recognize option %s. For usage, run 'gsnap --help'",long_name);
- exit(9);
+ return 9;
}
break;
@@ -4941,7 +5248,7 @@ main (int argc, char *argv[]) {
#ifdef PMAP
case 'a':
if ((required_alphabet = Alphabet_find(optarg)) == AA0) {
- exit(9);
+ return 9;
}
break;
case 'k': required_index1part = atoi(check_valid_int(optarg)); break;
@@ -4950,13 +5257,13 @@ main (int argc, char *argv[]) {
required_index1part = atoi(check_valid_int(optarg));
if (required_index1part > 16) {
fprintf(stderr,"The value for k-mer size must be 16 or less\n");
- exit(9);
+ return 9;
}
break;
#endif
case 'G': uncompressedp = true; break;
case 'g': user_genomicseg = optarg; break;
- case '1': user_selfalign_p = true; break; /* was maponlyp = true */
+ case '1': user_selfalign_p = true; break;
case '2': user_pairalign_p = true; break;
case 'B':
@@ -4995,11 +5302,11 @@ main (int argc, char *argv[]) {
#else
fprintf(stderr,"Batch mode %s not recognized. Only allow 4-5, since mmap is disabled. Run 'gmap --help' for more information.\n",optarg);
#endif
- exit(9);
+ return 9;
}
break;
- case 'K': maxintronlen_bound = atoi(check_valid_int(optarg)); break;
+ case 'K': maxintronlen = atoi(check_valid_int(optarg)); break;
case 'w': shortsplicedist = strtoul(check_valid_int(optarg),NULL,10); break;
case 'L': maxtotallen_bound = atoi(check_valid_int(optarg)); break;
@@ -5038,7 +5345,7 @@ main (int argc, char *argv[]) {
case 3: prune_poor_p = true; prune_repetitive_p = true; break;
default: fprintf(stderr,"Prune level %s not recognized.\n",optarg);
fprintf(stderr,"0=no pruning, 1=poor seqs, 2=repetitive seqs, 3=both poor and repetitive seqs (default)\n");
- exit(9);
+ return 9;
}
break;
#endif
@@ -5048,8 +5355,8 @@ main (int argc, char *argv[]) {
case '0': exception_raise_p = false; break; /* Allows signals to pass through */
case '3': printtype = CONTINUOUS; break;
case '4': printtype = CONTINUOUS_BY_EXON; break;
- case '6': debug_graphic_p = true; diagnosticp = false; break;
- case '8': /* diagnosticp = true; */
+ case '6': debug_graphic_p = true; break;
+ case '8':
if (!strcmp(optarg,"stage1")) {
stage1debug = true;
} else if (!strcmp(optarg,"diag")) {
@@ -5084,13 +5391,13 @@ main (int argc, char *argv[]) {
stage3debug = POST_DISTAL_MEDIAL;
} else {
fprintf(stderr,"Allowed arguments for -8 flag are stage2, smoothing, singles, introns, hmm, dualbreaks, cycles, canonical, changepoint, distalmedial\n");
- exit(9);
+ return 9;
}
break;
- case '9': checkp = true; diagnosticp = true; break;
+ case '9': checkp = true; break;
case 'n':
- maxpaths = atoi(check_valid_int(optarg));
- if (maxpaths == 1) {
+ maxpaths_report = atoi(check_valid_int(optarg));
+ if (maxpaths_report == 1) {
fprintf(stderr,"Note: -n 1 will not report chimeric alignments. If you want a single alignment plus chimeras, use -n 0 instead.\n");
}
break;
@@ -5144,7 +5451,7 @@ main (int argc, char *argv[]) {
fprintf(stderr," map_exons (7)\n");
fprintf(stderr," map_ranges (8)\n");
fprintf(stderr," coords (9)\n");
- exit(9);
+ return 9;
}
break;
case 'Z': printtype = COMPRESSED; break;
@@ -5175,7 +5482,7 @@ main (int argc, char *argv[]) {
printtype = EXONS_GENOMIC;
} else {
fprintf(stderr,"Argument to -E flag must be either \"cdna\" or \"genomic\"\n");
- exit(9);
+ return 9;
}
break;
@@ -5206,14 +5513,14 @@ main (int argc, char *argv[]) {
sense_filter = 0;
} else {
fprintf(stderr,"direction %s not recognized. Must be sense_force, antisense_force, sense_filter, antisense_filter, or auto\n",optarg);
- exit(9);
+ return 9;
}
break;
case 'j':
if (user_quality_shift == true) {
fprintf(stderr,"Cannot specify both -j (--quality-print-shift) and --quality-protocol\n");
- exit(9);
+ return 9;
} else {
quality_shift = atoi(check_valid_int(optarg));
user_quality_shift = true;
@@ -5227,39 +5534,17 @@ main (int argc, char *argv[]) {
case 'i': user_ngap = atoi(check_valid_int(optarg)); break;
case 'l': wraplength = atoi(check_valid_int(optarg)); break;
- case '?': fprintf(stderr,"For usage, run 'gmap --help'\n"); exit(9);
- default: exit(9);
+ case '?': fprintf(stderr,"For usage, run 'gmap --help'\n"); return 9;
+ default: return 9;
}
}
- argc -= optind;
- argv += optind;
-
-
- check_compiler_assumptions();
-
- if (exception_raise_p == false) {
- fprintf(stderr,"Allowing signals and exceptions to pass through\n");
- Except_inactivate();
- } else {
-#ifdef HAVE_SIGACTION
- signal_action.sa_handler = signal_handler;
- signal_action.sa_flags = 0;
- sigfillset(&signal_action.sa_mask);
-
- sigaction(SIGFPE,&signal_action,NULL);
- sigaction(SIGSEGV,&signal_action,NULL);
- sigaction(SIGTRAP,&signal_action,NULL);
- sigaction(SIGUSR1,&signal_action,NULL);
-#endif
- }
-
if (printtype == SPLICESITES || printtype == INTRONS) {
- if (maxpaths > 1 || (sense_try != +1 && sense_filter != +1)) {
+ if (maxpaths_report > 1 || (sense_try != +1 && sense_filter != +1)) {
fprintf(stderr,"For splicesites or introns output, you should probably add flags '-n 1' and either '-z sense_force' or '-z sense_filter'.\n");
}
}
-
+
if (user_ngap >= 0) {
ngap = user_ngap;
} else if (printtype == EXONS_CDNA || printtype == EXONS_GENOMIC) {
@@ -5267,8 +5552,8 @@ main (int argc, char *argv[]) {
ngap = 0;
};
- if (maxintronlen_bound > maxtotallen_bound) {
- maxintronlen_bound = maxtotallen_bound;
+ if (maxintronlen > maxtotallen_bound) {
+ maxintronlen = maxtotallen_bound;
}
#ifdef HAVE_PTHREAD
@@ -5280,113 +5565,55 @@ main (int argc, char *argv[]) {
#endif
#endif
-
- /* Handle "?" command-line queries */
-
if (user_cmdline != NULL) {
- nchrs = 1;
+ part_modulus = 0;
+ part_interval = 1;
+ inbuffer_nspaces = 0;
+ nchromosomes = 1;
+ dbroot = (char *) NULL;
} else if (user_selfalign_p == true) {
- nchrs = 1;
+ nchromosomes = 1;
+ dbroot = (char *) NULL;
} else if (user_pairalign_p == true) {
- nchrs = 1;
+ nchromosomes = 1;
+ dbroot = (char *) NULL;
} else if (user_genomicseg != NULL) {
/* Ignore -D and -d flags */
- nchrs = 1;
+ nchromosomes = 1;
+ dbroot = (char *) NULL;
} else if (dbroot == NULL) {
fprintf(stderr,"Need to specify the -d, -g, -1, -2, or --cmdline flag\n");
print_program_usage();
- exit(9);
+ return 9;
} else if (!strcmp(dbroot,"?")) {
Datadir_avail_gmap_databases(stdout,user_genomedir);
- exit(0);
- } else {
- genomesubdir = Datadir_find_genomesubdir(&fileroot,&dbversion,user_genomedir,dbroot);
+ return 1;
+ }
- iitfile = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+
- strlen(fileroot)+strlen(".chromosome.iit")+1,sizeof(char));
- sprintf(iitfile,"%s/%s.chromosome.iit",genomesubdir,fileroot);
- if ((chromosome_iit = Univ_IIT_read(iitfile,/*readonlyp*/true,/*add_iit_p*/false)) == NULL) {
- fprintf(stderr,"IIT file %s is not valid\n",iitfile);
- exit(9);
-#ifdef LARGE_GENOMES
- } else if (Univ_IIT_coord_values_8p(chromosome_iit) == false) {
- fprintf(stderr,"This program gmapl is designed for large genomes.\n");
- fprintf(stderr,"For small genomes of less than 2^32 (4 billion) bp, please run gmap instead.\n");
- exit(9);
-#endif
- } else {
- nchrs = Univ_IIT_total_nintervals(chromosome_iit);
- circular_typeint = Univ_IIT_typeint(chromosome_iit,"circular");
- circularp = Univ_IIT_circularp(chromosome_iit);
+#ifndef PMAP
+ if (printtype == SAM) {
+ if (sam_read_group_id == NULL && sam_read_group_name != NULL) {
+ sam_read_group_id = sam_read_group_name;
+ } else if (sam_read_group_id != NULL && sam_read_group_name == NULL) {
+ sam_read_group_name = sam_read_group_id;
}
- genomelength = Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false);
-
- FREE(iitfile);
-
- if (map_iitfile == NULL) {
- /* Skip */
- } else if (!strcmp(map_iitfile,"?")) {
- Datadir_avail_maps(stdout,user_mapdir,genomesubdir,fileroot);
- exit(0);
- } else {
- mapdir = Datadir_find_mapdir(user_mapdir,genomesubdir,fileroot);
- iitfile = (char *) CALLOC(strlen(mapdir)+strlen("/")+
- strlen(map_iitfile)+strlen(".iit")+1,sizeof(char));
- sprintf(iitfile,"%s/%s.iit",mapdir,map_iitfile);
- if ((map_iit = IIT_read(iitfile,/*name*/map_iitfile,/*readonlyp*/true,/*divread*/READ_ALL,
- /*divstring*/NULL,/*add_iit_p*/true,/*labels_read_p*/true)) == NULL) {
- fprintf(stderr,"Map file %s.iit not found in %s. Available files:\n",map_iitfile,mapdir);
- Datadir_list_directory(stderr,mapdir);
- fprintf(stderr,"Either install file %s.iit or specify a directory for the IIT file\n",iitfile);
- fprintf(stderr,"using the -M flag.\n");
- exit(9);
- } else {
- map_divint_crosstable = Univ_IIT_divint_crosstable(chromosome_iit,map_iit);
- }
+ }
+#endif
- check_map_iit(map_iit,chromosome_iit);
+ return 0;
+}
- FREE(iitfile);
- FREE(mapdir);
- FREE(map_iitfile);
- }
- if (splicing_file != NULL) {
- if (user_splicingdir == NULL) {
- if ((splicing_iit = IIT_read(splicing_file,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL,
- /*divstring*/NULL,/*add_iit_p*/false,/*labels_read_p*/true)) != NULL) {
- fprintf(stderr,"Reading splicing file %s locally...",splicing_file);
- } else {
- iitfile = (char *) CALLOC(strlen(user_splicingdir)+strlen("/")+strlen(splicing_file)+1,sizeof(char));
- sprintf(iitfile,"%s/%s",user_splicingdir,splicing_file);
- if ((splicing_iit = IIT_read(splicing_file,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL,
- /*divstring*/NULL,/*add_iit_p*/false,/*labels_read_p*/true)) != NULL) {
- fprintf(stderr,"Reading splicing file %s locally...",splicing_file);
- FREE(iitfile);
- }
- }
- }
-
- if (splicing_iit == NULL) {
- mapdir = Datadir_find_mapdir(/*user_mapdir*/NULL,genomesubdir,fileroot);
- iitfile = (char *) CALLOC(strlen(mapdir)+strlen("/")+
- strlen(splicing_file)+1,sizeof(char));
- sprintf(iitfile,"%s/%s",mapdir,splicing_file);
- if ((splicing_iit = IIT_read(iitfile,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL,
- /*divstring*/NULL,/*add_iit_p*/true,/*labels_read_p*/true)) != NULL) {
- fprintf(stderr,"Reading splicing file %s...",iitfile);
- FREE(iitfile);
- FREE(mapdir);
- } else {
- fprintf(stderr,"Splicing file %s.iit not found locally or in %s. Available files:\n",splicing_file,mapdir);
- Datadir_list_directory(stderr,mapdir);
- fprintf(stderr,"Either install file %s or specify a full directory path\n",splicing_file);
- exit(9);
- }
- }
- }
- }
+static Inbuffer_T
+open_input_stream (int *nread, Sequence_T *usersegment, int argc, char **argv) {
+ Inbuffer_T inbuffer;
+ int nextchar = '\0';
+ FILE *input = NULL;
+ char **files;
+ int nfiles;
+ Request_T request;
+ char *p;
/* Read user segment before rest of sequences, because of shared usage of sequence.c */
if (user_cmdline != NULL) {
@@ -5398,20 +5625,20 @@ main (int argc, char *argv[]) {
fprintf(stderr,"--cmdline requires two strings separated by a comma");
exit(9);
} else {
- usersegment = Sequence_genomic_new(user_cmdline,(int) (p - user_cmdline),/*copyp*/true);
- if ((min_matches = Sequence_fulllength(usersegment)/2) > MIN_MATCHES) {
+ *usersegment = global_usersegment = Sequence_genomic_new(user_cmdline,(int) (p - user_cmdline),/*copyp*/true);
+ if ((min_matches = Sequence_fulllength(*usersegment)/2) > MIN_MATCHES) {
min_matches = MIN_MATCHES;
}
p++;
}
} else if (user_selfalign_p == true) {
- /* usersegment will be assigned to query sequence later */
+ /* usersegment will be assigned to query sequence below */
} else if (user_pairalign_p == true) {
/* Unfortunately, this procedure reads header of queryseq */
- usersegment = Sequence_read_unlimited(&nextchar,stdin);
- if ((min_matches = Sequence_fulllength(usersegment)/2) > MIN_MATCHES) {
+ *usersegment = Sequence_read_unlimited(&nextchar,stdin);
+ if ((min_matches = Sequence_fulllength(*usersegment)/2) > MIN_MATCHES) {
min_matches = MIN_MATCHES;
}
@@ -5420,14 +5647,14 @@ main (int argc, char *argv[]) {
fprintf(stderr,"Can't open file %s\n",user_genomicseg);
exit(9);
}
- if ((usersegment = Sequence_read_unlimited(&nextchar,input)) == NULL) {
+ if ((*usersegment = global_usersegment = Sequence_read_unlimited(&nextchar,input)) == NULL) {
fprintf(stderr,"File %s is empty\n",user_genomicseg);
exit(9);
} else {
- genomelength = (Univcoord_T) Sequence_fulllength(usersegment);
+ genomelength = (Univcoord_T) Sequence_fulllength(*usersegment);
}
- if ((min_matches = Sequence_fulllength(usersegment)/2) > MIN_MATCHES) {
+ if ((min_matches = Sequence_fulllength(*usersegment)/2) > MIN_MATCHES) {
min_matches = MIN_MATCHES;
}
fclose(input);
@@ -5436,29 +5663,11 @@ main (int argc, char *argv[]) {
min_matches = MIN_MATCHES;
}
- /* Read referencefile before rest of sequences, because of shared usage of sequence.c */
- if (referencefile != NULL) {
- if ((input = FOPEN_READ_TEXT(referencefile)) == NULL) {
- fprintf(stderr,"Can't open file %s\n",referencefile);
- exit(9);
- }
- if ((referenceseq = Sequence_read_unlimited(&nextchar,input)) == NULL) {
- fprintf(stderr,"File %s is empty\n",referencefile);
- exit(9);
- }
- fclose(input);
- }
-
-
-#ifdef MEMUSAGE
- Mem_usage_init();
- nworkers = 0;
- fprintf(stderr,"For memusage, setting to 0 threads\n");
-#endif
-
+ Inbuffer_setup(/*filter_if_both_p*/false,user_pairalign_p,global_usersegment,
+ part_modulus,part_interval);
if (user_cmdline != NULL) {
inbuffer = Inbuffer_cmdline(p,strlen(p));
- nread = 1;
+ *nread = 1;
} else if (user_selfalign_p == true) {
input = stdin;
@@ -5466,12 +5675,10 @@ main (int argc, char *argv[]) {
nfiles = 0;
/* Read in first batch of sequences */
- inbuffer = Inbuffer_new(nextchar,input,files,nfiles,maponlyp,
- inbuffer_nspaces,inbuffer_maxchars,part_interval,part_modulus,
- /*filter_if_both_p*/false);
- nread = Inbuffer_fill_init(inbuffer);
- request = Inbuffer_first_request(inbuffer);
- usersegment = Request_queryseq(request);
+ inbuffer = Inbuffer_new(nextchar,input,files,nfiles,inbuffer_nspaces);
+ *nread = Inbuffer_fill_init(inbuffer);
+ request = Inbuffer_first_request(inbuffer); /* Need usersegment, not the request itself */
+ *usersegment = Request_queryseq(request);
} else {
/* Open input stream and peek at first char */
@@ -5492,12 +5699,136 @@ main (int argc, char *argv[]) {
}
/* Read in first batch of sequences */
- inbuffer = Inbuffer_new(nextchar,input,files,nfiles,maponlyp,
- inbuffer_nspaces,inbuffer_maxchars,part_interval,part_modulus,
- /*filter_if_both_p*/false);
- nread = Inbuffer_fill_init(inbuffer);
+ inbuffer = Inbuffer_new(nextchar,input,files,nfiles,inbuffer_nspaces);
+#ifdef USE_MPI
+ *nread = 0;
+#else
+ *nread = Inbuffer_fill_init(inbuffer);
+#endif
+ }
+
+ return inbuffer;
+}
+
+
+int
+main (int argc, char *argv[]) {
+#ifdef USE_MPI
+ int nbeyond;
+#else
+ bool multiple_sequences_p = false;
+#endif
+ int cmdline_status;
+
+ char *genomesubdir = NULL, *snpsdir = NULL, *modedir = NULL, *mapdir = NULL, *iitfile = NULL, *fileroot = NULL;
+ int divno;
+ Univinterval_T interval;
+ Sequence_T usersegment = NULL;
+
+ bool showcontigp = true;
+ int nread;
+ double runtime;
+
+ Splicestringpool_T splicestringpool;
+
+#ifdef HAVE_PTHREAD
+ int ret, i;
+ pthread_attr_t thread_attr_join;
+#ifdef WORKER_DETACH
+ pthread_attr_t thread_attr_detach;
+#endif
+#endif
+
+#ifdef HAVE_SIGACTION
+ struct sigaction signal_action;
+#endif
+
+ extern int optind;
+
+#ifdef MEMUSAGE
+ Mem_usage_init();
+ Mem_usage_set_threadname("main");
+#endif
+
+
+#ifdef USE_MPI
+ MPI_Init(&argc,&argv);
+ MPI_Comm_rank(MPI_COMM_WORLD,&myid);
+ MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
+
+ if ((n_worker_procs = nprocs - 1) == 0) {
+ if (myid == 0) {
+ fprintf(stderr,"Need at least 2 processes for MPI version\n");
+ }
+ MPI_Finalize();
+ exit(0);
+
+ } else {
+ MPI_Debug_setup(myid);
+ }
+#endif
+
+ cmdline_status = parse_command_line(argc,argv,optind);
+ argc -= optind;
+ argv += optind;
+
+ if (cmdline_status == 0) {
+ /* okay to continue */
+ } else if (cmdline_status == 1) {
+ /* only information needed */
+#ifdef USE_MPI
+ MPI_Finalize();
+#endif
+ exit(0);
+ } else {
+#ifdef USE_MPI
+ MPI_Finalize();
+#endif
+ exit(cmdline_status);
+ }
+
+ check_compiler_assumptions();
+
+ if (exception_raise_p == false) {
+ fprintf(stderr,"Allowing signals and exceptions to pass through\n");
+ Except_inactivate();
+ } else {
+#ifdef HAVE_SIGACTION
+ signal_action.sa_handler = signal_handler;
+ signal_action.sa_flags = 0;
+ sigfillset(&signal_action.sa_mask); /* After first signal, block all other signals */
+
+ /* Note: SIGKILL and SIGSTOP cannot be caught */
+
+ sigaction(SIGFPE,&signal_action,NULL);
+ sigaction(SIGSEGV,&signal_action,NULL);
+ sigaction(SIGTRAP,&signal_action,NULL);
+ sigaction(SIGUSR1,&signal_action,NULL);
+ sigaction(SIGABRT,&signal_action,NULL); /* abnormal termination (abort) */
+ sigaction(SIGBUS,&signal_action,NULL); /* bus error */
+ sigaction(SIGFPE,&signal_action,NULL); /* arithmetic exception */
+ sigaction(SIGHUP,&signal_action,NULL); /* hangup */
+ sigaction(SIGILL,&signal_action,NULL); /* illegal hardware instruction */
+ sigaction(SIGINT,&signal_action,NULL); /* terminal interruption (control-C) */
+ sigaction(SIGPIPE,&signal_action,NULL); /* write to pipe with no readers */
+ sigaction(SIGQUIT,&signal_action,NULL); /* terminal quit (control-backslash) */
+ sigaction(SIGSEGV,&signal_action,NULL); /* invalid memory reference */
+ sigaction(SIGSYS,&signal_action,NULL); /* invalid system call */
+ sigaction(SIGTERM,&signal_action,NULL); /* Unix kill command */
+ sigaction(SIGTRAP,&signal_action,NULL); /* hardware fault */
+ sigaction(SIGXCPU,&signal_action,NULL); /* CPU limit exceeded */
+ sigaction(SIGXFSZ,&signal_action,NULL); /* file size limit exceeded */
+#endif
+ }
+
+#ifdef USE_MPI
+ if (myid > 0) {
+ inbuffer = open_input_stream(&nread,&usersegment,argc,argv);
}
+#else
+ inbuffer = open_input_stream(&nread,&usersegment,argc,argv);
+
if (nread > 1) {
multiple_sequences_p = true;
#ifdef HAVE_MMAP
@@ -5512,6 +5843,7 @@ main (int argc, char *argv[]) {
fprintf(stderr,"\n");
}
#endif
+
} else {
/* multiple_sequences_p = false; */
/* fprintf(stderr,"Note: only 1 sequence detected. Ignoring batch (-B) command\n"); */
@@ -5527,8 +5859,101 @@ main (int argc, char *argv[]) {
#endif
}
+#endif
+
+
+ if (dbroot != NULL) {
+ /* Prepare genomic data */
+ genomesubdir = Datadir_find_genomesubdir(&fileroot,&dbversion,user_genomedir,dbroot);
+
+ iitfile = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+
+ strlen(fileroot)+strlen(".chromosome.iit")+1,sizeof(char));
+ sprintf(iitfile,"%s/%s.chromosome.iit",genomesubdir,fileroot);
+ if ((chromosome_iit = Univ_IIT_read(iitfile,/*readonlyp*/true,/*add_iit_p*/false)) == NULL) {
+ fprintf(stderr,"IIT file %s is not valid\n",iitfile);
+ exit(9);
+#ifdef LARGE_GENOMES
+ } else if (Univ_IIT_coord_values_8p(chromosome_iit) == false) {
+ fprintf(stderr,"This program gmapl is designed for large genomes.\n");
+ fprintf(stderr,"For small genomes of less than 2^32 (4 billion) bp, please run gmap instead.\n");
+ exit(9);
+#endif
+ } else {
+ nchromosomes = Univ_IIT_total_nintervals(chromosome_iit);
+ circular_typeint = Univ_IIT_typeint(chromosome_iit,"circular");
+ circularp = Univ_IIT_circularp(&any_circular_p,chromosome_iit);
+ }
+ genomelength = Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false);
+
+ FREE(iitfile);
+ }
+
+#ifdef USE_MPI
+ /* Can prevent loading of files by rank 0 process */
+#endif
+
+ if (map_iitfile == NULL) {
+ /* Skip */
+ } else if (!strcmp(map_iitfile,"?")) {
+ Datadir_avail_maps(stdout,user_mapdir,genomesubdir,fileroot);
+ exit(0);
+ } else {
+ mapdir = Datadir_find_mapdir(user_mapdir,genomesubdir,fileroot);
+ iitfile = (char *) CALLOC(strlen(mapdir)+strlen("/")+
+ strlen(map_iitfile)+strlen(".iit")+1,sizeof(char));
+ sprintf(iitfile,"%s/%s.iit",mapdir,map_iitfile);
+ if ((map_iit = IIT_read(iitfile,/*name*/map_iitfile,/*readonlyp*/true,/*divread*/READ_ALL,
+ /*divstring*/NULL,/*add_iit_p*/true,/*labels_read_p*/true)) == NULL) {
+ fprintf(stderr,"Map file %s.iit not found in %s. Available files:\n",map_iitfile,mapdir);
+ Datadir_list_directory(stderr,mapdir);
+ fprintf(stderr,"Either install file %s.iit or specify a directory for the IIT file\n",iitfile);
+ fprintf(stderr,"using the -M flag.\n");
+ exit(9);
+ } else {
+ map_divint_crosstable = Univ_IIT_divint_crosstable(chromosome_iit,map_iit);
+ }
+
+ check_map_iit(map_iit,chromosome_iit);
+
+ FREE(iitfile);
+ FREE(mapdir);
+ FREE(map_iitfile);
+ }
+
+ if (splicing_file != NULL) {
+ if (user_splicingdir == NULL) {
+ if ((splicing_iit = IIT_read(splicing_file,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL,
+ /*divstring*/NULL,/*add_iit_p*/false,/*labels_read_p*/true)) != NULL) {
+ fprintf(stderr,"Reading splicing file %s locally...",splicing_file);
+ } else {
+ iitfile = (char *) CALLOC(strlen(user_splicingdir)+strlen("/")+strlen(splicing_file)+1,sizeof(char));
+ sprintf(iitfile,"%s/%s",user_splicingdir,splicing_file);
+ if ((splicing_iit = IIT_read(splicing_file,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL,
+ /*divstring*/NULL,/*add_iit_p*/false,/*labels_read_p*/true)) != NULL) {
+ fprintf(stderr,"Reading splicing file %s locally...",splicing_file);
+ FREE(iitfile);
+ }
+ }
+ }
- /* Prepare genomic data */
+ if (splicing_iit == NULL) {
+ mapdir = Datadir_find_mapdir(/*user_mapdir*/NULL,genomesubdir,fileroot);
+ iitfile = (char *) CALLOC(strlen(mapdir)+strlen("/")+
+ strlen(splicing_file)+1,sizeof(char));
+ sprintf(iitfile,"%s/%s",mapdir,splicing_file);
+ if ((splicing_iit = IIT_read(iitfile,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL,
+ /*divstring*/NULL,/*add_iit_p*/true,/*labels_read_p*/true)) != NULL) {
+ fprintf(stderr,"Reading splicing file %s...",iitfile);
+ FREE(iitfile);
+ FREE(mapdir);
+ } else {
+ fprintf(stderr,"Splicing file %s.iit not found locally or in %s. Available files:\n",splicing_file,mapdir);
+ Datadir_list_directory(stderr,mapdir);
+ fprintf(stderr,"Either install file %s or specify a full directory path\n",splicing_file);
+ exit(9);
+ }
+ }
+ }
/* Complement_init(); */
Dynprog_init(mode);
@@ -5538,7 +5963,7 @@ main (int argc, char *argv[]) {
if (user_pairalign_p == true) {
showcontigp = false;
- /* maxpaths = 1; -- no; could have different paths against the user segment. */
+ /* maxpaths_report = 1; -- no; could have different paths against the user segment. */
genomecomp = (Genome_T) NULL;
genomebits = (Genome_T) NULL;
@@ -5547,36 +5972,22 @@ main (int argc, char *argv[]) {
dbversion = (char *) NULL;
/* Do for each usersegment */
- } else if (usersegment != NULL) {
+ } else if (global_usersegment != NULL) {
/* Map against user-provided genomic segment */
showcontigp = false;
- /* maxpaths = 1; -- no; could have different paths against the user segment. */
+ /* maxpaths_report = 1; -- no; could have different paths against the user segment. */
genomecomp = (Genome_T) NULL;
genomebits = (Genome_T) NULL;
genomecomp_alt = (Genome_T) NULL;
genomebits_alt = (Genome_T) NULL;
dbversion = (char *) NULL;
- genomecomp_blocks = Compress_create_blocks_comp(Sequence_fullpointer(usersegment),Sequence_fulllength(usersegment));
- genomebits_blocks = Compress_create_blocks_bits(genomecomp_blocks,Sequence_fulllength(usersegment));
+ genomecomp_blocks = Compress_create_blocks_comp(Sequence_fullpointer(global_usersegment),Sequence_fulllength(global_usersegment));
+ genomebits_blocks = Compress_create_blocks_bits(genomecomp_blocks,Sequence_fulllength(global_usersegment));
- if (userstage1p == true) {
-#ifdef PMAP
- indexdb_fwd = Indexdb_new_segment(Sequence_fullpointer(usersegment),
- alphabet_size,index1part_aa,/*watsonp*/true,index1interval);
- indexdb_rev = Indexdb_new_segment(Sequence_fullpointer(usersegment),
- alphabet_size,index1part_aa,/*watsonp*/false,index1interval);
-#elif defined(LARGE_GENOMES)
- fprintf(stderr,"If you are providing a genomic segment, please use gmap instead of gmapl\n");
- exit(9);
-#else
- indexdb_fwd = Indexdb_new_segment(Sequence_fullpointer(usersegment),index1part,index1interval);
- indexdb_rev = indexdb_fwd;
-#endif
- }
- if (Sequence_fulllength(usersegment) > 1000000) {
+ if (Sequence_fulllength(global_usersegment) > 1000000) {
fprintf(stderr,"Genomic sequence is unusually long (%d bp). GMAP handles genomes better when\n",
- Sequence_fulllength(usersegment));
+ Sequence_fulllength(global_usersegment));
fprintf(stderr," they are converted into gmap databases first using gmap_setup, and then accessed\n");
fprintf(stderr," with the -d flag.\n");
}
@@ -5596,28 +6007,28 @@ main (int argc, char *argv[]) {
}
genomecomp = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- uncompressedp,genome_access);
+ uncompressedp,genome_access,sharedp);
genomebits = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_BITS,
- uncompressedp,genome_access);
+ uncompressedp,genome_access,sharedp);
if (snps_root == NULL) {
genomecomp_alt = genomebits_alt = (Genome_T) NULL;
} else {
genomecomp_alt = Genome_new(genomesubdir,fileroot,snps_root,/*genometype*/GENOME_OLIGOS,
- uncompressedp,genome_access);
+ uncompressedp,genome_access,sharedp);
genomebits_alt = Genome_new(genomesubdir,fileroot,snps_root,/*genometype*/GENOME_BITS,
- uncompressedp,genome_access);
+ uncompressedp,genome_access,sharedp);
}
indexdb_fwd = Indexdb_new_genome(&index1part_aa,&index1interval,
genomesubdir,fileroot,FWD_FILESUFFIX,/*snps_root*/NULL,
&alphabet,&alphabet_size,required_alphabet,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access);
+ expand_offsets_p,offsetsstrm_access,positions_access,sharedp);
indexdb_rev = Indexdb_new_genome(&index1part_aa,&index1interval,
genomesubdir,fileroot,REV_FILESUFFIX,/*snps_root*/NULL,
&alphabet,&alphabet_size,required_alphabet,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access);
+ expand_offsets_p,offsetsstrm_access,positions_access,sharedp);
if (indexdb_fwd == NULL || indexdb_rev == NULL) {
fprintf(stderr,"Cannot find offsets file %s.%s*offsets or %s.%s*offsets.\n",
@@ -5654,10 +6065,10 @@ main (int argc, char *argv[]) {
}
genomecomp = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- uncompressedp,genome_access);
+ uncompressedp,genome_access,sharedp);
genomecomp_blocks = Genome_blocks(genomecomp);
if ((genomebits = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_BITS,
- uncompressedp,genome_access)) == NULL) {
+ uncompressedp,genome_access,sharedp)) == NULL) {
genomebits_blocks = (Genomecomp_T *) NULL;
} else {
genomebits_blocks = Genome_blocks(genomebits);
@@ -5675,7 +6086,8 @@ main (int argc, char *argv[]) {
if ((indexdb_fwd = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"metct",/*snps_root*/NULL,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ sharedp)) == NULL) {
fprintf(stderr,"Cannot find metct index file. Need to run cmetindex first\n");
exit(9);
}
@@ -5683,7 +6095,8 @@ main (int argc, char *argv[]) {
if ((indexdb_rev = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"metga",/*snps_root*/NULL,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ sharedp)) == NULL) {
fprintf(stderr,"Cannot find metga index file. Need to run cmetindex first\n");
exit(9);
}
@@ -5698,7 +6111,8 @@ main (int argc, char *argv[]) {
if ((indexdb_fwd = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"a2iag",/*snps_root*/NULL,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ sharedp)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -5706,7 +6120,8 @@ main (int argc, char *argv[]) {
if ((indexdb_rev = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"a2itc",/*snps_root*/NULL,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ sharedp)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -5716,7 +6131,8 @@ main (int argc, char *argv[]) {
if ((indexdb_fwd = Indexdb_new_genome(&index1part,&index1interval,
genomesubdir,fileroot,IDX_FILESUFFIX,/*snps_root*/NULL,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ sharedp)) == NULL) {
fprintf(stderr,"Cannot find offsets file %s.%s*offsets, needed for GSNAP\n",fileroot,IDX_FILESUFFIX);
exit(9);
}
@@ -5757,18 +6173,18 @@ main (int argc, char *argv[]) {
}
genomecomp = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- uncompressedp,genome_access);
+ uncompressedp,genome_access,sharedp);
genomecomp_blocks = Genome_blocks(genomecomp);
if ((genomebits = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_BITS,
- uncompressedp,genome_access)) == NULL) {
+ uncompressedp,genome_access,sharedp)) == NULL) {
genomebits_blocks = (Genomecomp_T *) NULL;
} else {
genomebits_blocks = Genome_blocks(genomebits);
}
genomecomp_alt = Genome_new(genomesubdir,fileroot,snps_root,/*genometype*/GENOME_OLIGOS,
- uncompressedp,genome_access);
+ uncompressedp,genome_access,sharedp);
genomebits_alt = Genome_new(genomesubdir,fileroot,snps_root,/*genometype*/GENOME_BITS,
- uncompressedp,genome_access);
+ uncompressedp,genome_access,sharedp);
if (mode == CMET_STRANDED || mode == CMET_NONSTRANDED) {
if (user_cmetdir == NULL) {
@@ -5780,14 +6196,16 @@ main (int argc, char *argv[]) {
if ((indexdb_fwd = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"metct",snps_root,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ sharedp)) == NULL) {
fprintf(stderr,"Cannot find metct index file. Need to run cmetindex first\n");
exit(9);
}
if ((indexdb_rev = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"metga",snps_root,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ sharedp)) == NULL) {
fprintf(stderr,"Cannot find metga index file. Need to run cmetindex first\n");
exit(9);
}
@@ -5802,14 +6220,16 @@ main (int argc, char *argv[]) {
if ((indexdb_fwd = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"a2iag",snps_root,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ sharedp)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
if ((indexdb_rev = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"a2itc",snps_root,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ sharedp)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -5818,7 +6238,8 @@ main (int argc, char *argv[]) {
indexdb_fwd = Indexdb_new_genome(&index1part,&index1interval,
snpsdir,fileroot,/*idx_filesuffix*/"ref",snps_root,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access);
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ sharedp);
if (indexdb_fwd == NULL) {
fprintf(stderr,"Cannot find snps index file for %s in directory %s\n",snps_root,snpsdir);
exit(9);
@@ -5916,7 +6337,6 @@ main (int argc, char *argv[]) {
fprintf(stderr,"done\n");
}
-
if (user_pairalign_p == true) {
/* Creation of genomebits/genomecomp and initialization done within single_thread() for each input sequence */
@@ -5958,7 +6378,7 @@ main (int argc, char *argv[]) {
}
Stage2_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,cross_species_p,
- suboptimal_score_start,suboptimal_score_end,mode,
+ suboptimal_score_start,suboptimal_score_end,sufflookback,nsufflookback,maxintronlen,mode,
/*snps_p*/genomecomp_alt ? true : false);
Dynprog_single_setup(homopolymerp);
Dynprog_genome_setup(novelsplicingp,splicing_iit,splicing_divint_crosstable,
@@ -5967,127 +6387,185 @@ main (int argc, char *argv[]) {
trieoffsets_obs,triecontents_obs,trieoffsets_max,triecontents_max);
Pair_setup(trim_mismatch_score,trim_indel_score,gff3_separators_p,sam_insert_0M_p,
force_xs_direction_p,md_lowercase_variant_p,
- /*snps_p*/genomecomp_alt ? true : false,genomelength,cigar_action);
+ /*snps_p*/genomecomp_alt ? true : false,
+ /*print_nsnpdiffs_p*/genomecomp_alt ? true : false,genomelength);
Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
require_splicedir_p,splicing_iit,splicing_divint_crosstable,
donor_typeint,acceptor_typeint,
splicesites,min_intronlength,max_deletionlength,/*min_indel_end_matches*/6,
- /*output_sam_p*/printtype == SAM ? true : false,
+ maxpeelback_distalmedial,nullgap,extramaterial_end,extramaterial_paired,
+ extraband_single,extraband_end,extraband_paired,
+ ngap,maxintronlen,/*output_sam_p*/printtype == SAM ? true : false,
homopolymerp,stage3debug);
Splicetrie_setup(splicesites,splicefrags_ref,splicefrags_alt,
trieoffsets_obs,triecontents_obs,trieoffsets_max,triecontents_max,
/*snpp*/false,amb_closest_p,/*amb_clip_p*/true,/*min_shortend*/2);
-
- /* Setup outbuffer */
-#ifndef PMAP
- if (printtype == SAM) {
- if (sam_read_group_id == NULL && sam_read_group_name != NULL) {
- sam_read_group_id = sam_read_group_name;
- } else if (sam_read_group_id != NULL && sam_read_group_name == NULL) {
- sam_read_group_name = sam_read_group_id;
- }
+ Output_setup(chromosome_iit,nofailsp,failsonlyp,quiet_if_excessive_p,maxpaths_report,
+ failedinput_root,quality_shift,
+ printtype,invertmode,wraplength,ngap,nointronlenp,sam_paired_p,cds_startpos,
+ fulllengthp,truncatep,strictp,checksump,genomecomp,usersegment,user_genomicseg,
+ dbversion,user_chrsubsetname,contig_iit,altstrain_iit,
+ /*chimeras_allowed_p*/chimera_margin > 0 ? true : false,
+ map_iit,map_divint_crosstable,map_exons_p,map_bothstrands_p,
+ nflanking,print_comment_p,sam_read_group_id);
+
+#ifdef USE_MPI
+ if (myid == 0) {
+ Outbuffer_setup(argc,argv,optind,chromosome_iit,any_circular_p,
+ nworkers,orderedp,quiet_if_excessive_p,
+ printtype,usersegment,sam_headers_p,sam_read_group_id,sam_read_group_name,
+ sam_read_group_library,sam_read_group_platform,
+ appendp,/*output_file*/NULL,split_output_root,failedinput_root);
+ outbuffer = Outbuffer_new(output_buffer_size,/*nread*/0);
+ /* Inbuffer_set_outbuffer(inbuffer,outbuffer); */
+
+ fprintf(stderr,"Starting alignment\n");
+ stopwatch = Stopwatch_new();
+ Stopwatch_start(stopwatch);
}
+#else
+ Outbuffer_setup(argc,argv,optind,chromosome_iit,any_circular_p,
+ nworkers,orderedp,quiet_if_excessive_p,
+ printtype,usersegment,sam_headers_p,sam_read_group_id,sam_read_group_name,
+ sam_read_group_library,sam_read_group_platform,
+ appendp,/*output_file*/NULL,split_output_root,failedinput_root);
+ outbuffer = Outbuffer_new(output_buffer_size,nread);
+ Inbuffer_set_outbuffer(inbuffer,outbuffer);
+
+ fprintf(stderr,"Starting alignment\n");
+ stopwatch = Stopwatch_new();
+ Stopwatch_start(stopwatch);
#endif
- outbuffer = Outbuffer_new(output_buffer_size,nread,sevenway_root,failedinput_root,appendp,
- /*chimeras_allowed_p*/chimera_margin > 0 ? true : false,
- user_genomicseg,usersegment,dbversion,genomecomp,chromosome_iit,
- user_chrsubsetname,contig_iit,altstrain_iit,map_iit,
- map_divint_crosstable,printtype,checksump,chimera_margin,
-#ifndef PMAP
- sam_headers_p,quality_shift,sam_paired_p,
- sam_read_group_id,sam_read_group_name,
- sam_read_group_library,sam_read_group_platform,
- nworkers,orderedp,
+
+#ifdef USE_MPI
+ /* MPI version */
+ if (myid == 0) {
+#ifdef WORKER_DETACH
+ pthread_attr_init(&thread_attr_detach);
+ if ((ret = pthread_attr_setdetachstate(&thread_attr_detach,PTHREAD_CREATE_DETACHED)) != 0) {
+ fprintf(stderr,"ERROR: pthread_attr_setdetachstate %d\n",ret);
+ exit(1);
+ }
#endif
- nofailsp,failsonlyp,maxpaths,quiet_if_excessive_p,
- map_exons_p,map_bothstrands_p,print_comment_p,nflanking,
- proteinmode,invertmode,nointronlenp,wraplength,
- ngap,cds_startpos,fulllengthp,truncatep,strictp,diagnosticp,maponlyp,
- stage1debug,diag_debug,debug_graphic_p,argc,argv,optind);
+ pthread_attr_init(&thread_attr_join);
+ if ((ret = pthread_attr_setdetachstate(&thread_attr_join,PTHREAD_CREATE_JOINABLE)) != 0) {
+ fprintf(stderr,"ERROR: pthread_attr_setdetachstate %d\n",ret);
+ exit(1);
+ }
- Inbuffer_set_outbuffer(inbuffer,outbuffer);
+ Except_init_pthread();
+ /* pthread_key_create(&global_request_key,NULL); */
+ if (orderedp == true) {
+ pthread_create(&output_thread_id,&thread_attr_join,Outbuffer_thread_ordered,
+ (void *) outbuffer);
+ } else {
+ pthread_create(&output_thread_id,&thread_attr_join,Outbuffer_thread_anyorder,
+ (void *) outbuffer);
+ }
- stopwatch = Stopwatch_new();
- Stopwatch_start(stopwatch);
+ Outbuffer_mpi_process(outbuffer,/*n_worker_procs*/nprocs - 1,part_modulus,part_interval);
+ pthread_join(output_thread_id,NULL);
- if (referenceseq != NULL) {
- fprintf(stderr,"Relative alignment currently not implemented\n");
- exit(9);
- chimera_margin = -1;
- /* align_relative(input,files,nfiles,nextchar,queryseq,referenceseq); */
- Sequence_free(&referenceseq);
+ /* pthread_key_delete(global_request_key); */
+ /* Except_term_pthread(); */
} else {
-#ifndef HAVE_PTHREAD
- single_thread();
-#else
- if (nworkers == 0) {
- single_thread();
+ worker_mpi_process(/*worker_id*/myid,inbuffer);
+ }
- } else if (multiple_sequences_p == false) {
- single_thread();
+#elif !defined(HAVE_PTHREAD)
+ /* Serial version */
+ single_thread();
- } else {
+#else
+ /* Pthreads version */
+ if (nworkers == 0) {
+ single_thread();
+
+ } else if (multiple_sequences_p == false) {
+ single_thread();
+
+ } else {
#ifdef WORKER_DETACH
- pthread_attr_init(&thread_attr_detach);
- if ((ret = pthread_attr_setdetachstate(&thread_attr_detach,PTHREAD_CREATE_DETACHED)) != 0) {
- fprintf(stderr,"ERROR: pthread_attr_setdetachstate %d\n",ret);
- exit(1);
- }
+ pthread_attr_init(&thread_attr_detach);
+ if ((ret = pthread_attr_setdetachstate(&thread_attr_detach,PTHREAD_CREATE_DETACHED)) != 0) {
+ fprintf(stderr,"ERROR: pthread_attr_setdetachstate %d\n",ret);
+ exit(1);
+ }
#endif
- pthread_attr_init(&thread_attr_join);
- if ((ret = pthread_attr_setdetachstate(&thread_attr_join,PTHREAD_CREATE_JOINABLE)) != 0) {
- fprintf(stderr,"ERROR: pthread_attr_setdetachstate %d\n",ret);
- exit(1);
- }
+ pthread_attr_init(&thread_attr_join);
+ if ((ret = pthread_attr_setdetachstate(&thread_attr_join,PTHREAD_CREATE_JOINABLE)) != 0) {
+ fprintf(stderr,"ERROR: pthread_attr_setdetachstate %d\n",ret);
+ exit(1);
+ }
- worker_thread_ids = (pthread_t *) CALLOC(nworkers,sizeof(pthread_t));
+ worker_thread_ids = (pthread_t *) CALLOC(nworkers,sizeof(pthread_t));
+ Except_init_pthread();
+ pthread_key_create(&global_request_key,NULL);
- Except_init_pthread();
+ if (orderedp == true) {
+ pthread_create(&output_thread_id,&thread_attr_join,Outbuffer_thread_ordered,
+ (void *) outbuffer);
+ } else {
+ pthread_create(&output_thread_id,&thread_attr_join,Outbuffer_thread_anyorder,
+ (void *) outbuffer);
+ }
- if (orderedp == true) {
- pthread_create(&output_thread_id,&thread_attr_join,Outbuffer_thread_ordered,
- (void *) outbuffer);
- } else {
- pthread_create(&output_thread_id,&thread_attr_join,Outbuffer_thread_anyorder,
- (void *) outbuffer);
- }
- for (i = 0; i < nworkers; i++) {
+ for (i = 0; i < nworkers; i++) {
#ifdef WORKER_DETACH
- pthread_create(&(worker_thread_ids[i]),&thread_attr_detach,worker_thread,(void *) NULL);
+ pthread_create(&(worker_thread_ids[i]),&thread_attr_detach,worker_thread,(void *) NULL);
#else
- /* Need to have worker threads finish before we call Inbuffer_free() */
- pthread_create(&(worker_thread_ids[i]),&thread_attr_join,worker_thread,(void *) NULL);
+ /* Need to have worker threads finish before we call Inbuffer_free() */
+ pthread_create(&(worker_thread_ids[i]),&thread_attr_join,worker_thread,(void *) NULL);
#endif
- }
+ }
- pthread_join(output_thread_id,NULL);
- for (i = 0; i < nworkers; i++) {
- pthread_join(worker_thread_ids[i],NULL);
- }
-
- /* Do not delete global_except_key, because worker threads might still need it */
- /* Except_term_pthread(); */
+ pthread_join(output_thread_id,NULL);
+ for (i = 0; i < nworkers; i++) {
+ pthread_join(worker_thread_ids[i],NULL);
+ }
- FREE(worker_thread_ids);
+ pthread_key_delete(global_request_key);
+ /* Do not delete global_except_key, because worker threads might still need it */
+ /* Except_term_pthread(); */
- }
+ FREE(worker_thread_ids);
+ }
#endif /* HAVE_PTHREAD */
+
+
+#ifdef USE_MPI
+ if (myid == 0) {
+ runtime = Stopwatch_stop(stopwatch);
+ Stopwatch_free(&stopwatch);
+
+ nread = Outbuffer_nread(outbuffer);
+ nbeyond = Outbuffer_nbeyond(outbuffer);
+ fprintf(stderr,"Processed %u queries in %.2f seconds (%.2f queries/sec)\n",
+ nread-nbeyond,runtime,(double) nread/runtime);
+
+ Outbuffer_free(&outbuffer);
+ Inbuffer_free(&inbuffer); /* Also closes inputs */
}
+#else
+ /* Single CPU or Pthreads version */
runtime = Stopwatch_stop(stopwatch);
Stopwatch_free(&stopwatch);
nread = Outbuffer_nread(outbuffer);
+ /* nbeyond = Outbuffer_nbeyond(outbuffer); */
fprintf(stderr,"Processed %u queries in %.2f seconds (%.2f queries/sec)\n",
nread,runtime,(double) nread/runtime);
Outbuffer_free(&outbuffer);
Inbuffer_free(&inbuffer); /* Also closes inputs */
+#endif
+
#ifdef PMAP
Backtranslation_term();
#endif
@@ -6174,6 +6652,15 @@ main (int argc, char *argv[]) {
Sequence_free(&usersegment);
}
+ Outbuffer_cleanup();
+
+ Access_controlled_cleanup();
+
+#ifdef USE_MPI
+ MPI_Barrier(MPI_COMM_WORLD); /* Make sure all processes have cleaned up */
+ MPI_Finalize();
+#endif
+
return 0;
}
@@ -6282,7 +6769,7 @@ Usage: gmap [OPTIONS...] <FASTA files...>, or\n\
",min_intronlength);
fprintf(stdout,"\
-K, --intronlength=INT Max length for one internal intron (default %d)\n\
-",maxintronlen_bound);
+",maxintronlen);
fprintf(stdout,"\
-w, --localsplicedist=INT Max length for known splice sites at ends of sequence\n\
(default %d)\n\
@@ -6434,7 +6921,7 @@ Output options\n\
will not report chimeric alignments, since those imply\n\
two paths. If you want a single alignment plus chimeric\n\
alignments, then set this to be 0.\n\
-",maxpaths);
+",maxpaths_report);
fprintf(stdout,"\
--suboptimal-score=INT Report only paths whose score is within this value of the\n\
best path. By default, if this option is not provided,\n\
@@ -6469,7 +6956,7 @@ Output options\n\
#ifdef PMAP
- fprintf(stdout,"\
+ fprintf(stdout,"\
-Y, --tolerant Translates genome with corrections for frameshifts\n\
");
#else
@@ -6482,8 +6969,9 @@ Output options\n\
");
#endif
- fprintf(stdout,"\n");
+ fprintf(stdout,"\n");
+#ifndef PMAP
fprintf(stdout,"Options for GFF3 output\n");
fprintf(stdout,"\
--gff3-add-separators=INT Whether to add a ### separator after each query sequence\n\
@@ -6491,7 +6979,6 @@ Output options\n\
");
fprintf(stdout,"\n");
-#ifndef PMAP
fprintf(stdout,"Options for SAM output\n");
fprintf(stdout,"\
--no-sam-headers Do not print headers beginning with '@'\n\
diff --git a/src/gmapindex.c b/src/gmapindex.c
index a54a691..ec0be93 100644
--- a/src/gmapindex.c
+++ b/src/gmapindex.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmapindex.c 153955 2014-11-24 17:54:45Z twu $";
+static char rcsid[] = "$Id: gmapindex.c 167265 2015-06-11 00:04:50Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -81,7 +81,7 @@ typedef Tableuint_T Table_chrpos_T;
/* Program variables */
typedef enum {NONE, AUXFILES, GENOME, UNSHUFFLE, COUNT, OFFSETS, POSITIONS, SUFFIX_ARRAY, LCP_CHILD,
- ARRAY_UNCOMPRESS, CHILD_UNCOMPRESS} Action_T;
+ COMPRESSED_SUFFIX_ARRAY, ARRAY_UNCOMPRESS, CHILD_UNCOMPRESS} Action_T;
static Action_T action = NONE;
static char *sourcedir = ".";
static char *destdir = ".";
@@ -1044,6 +1044,7 @@ int
main (int argc, char *argv[]) {
int ncontigs;
Table_T accsegmentpos_table;
+ int shmid;
FILE *fp;
char *key, **keys, chrname[1024], chrname_alt[1024], Buffer[1024];
@@ -1060,7 +1061,11 @@ main (int argc, char *argv[]) {
char *chromosomefile, *iitfile, *positionsfile_high, *positionsfile_low, interval_char;
char *sarrayfile, *lcpexcfile, *lcpguidefile;
char *rankfile, *permuted_sarray_file; /* temporary files */
- char *childbytesfile, *childexcfile, *childguidefile;
+
+ /* For compressed suffix array */
+ char *csaptrfiles[5], *csacompfiles[5], *sasampleqfile, *sasamplesfile, *saindex0file;
+
+ char *childexcfile, *childguidefile;
char *lcpchilddcfile;
#ifdef USE_SEPARATE_BUCKETS
char *indexiptrsfile, *indexicompfile, *indexjptrsfile, *indexjcompfile;
@@ -1096,7 +1101,7 @@ main (int argc, char *argv[]) {
extern char *optarg;
char *string;
- while ((c = getopt(argc,argv,"F:D:d:z:k:q:ArlGUNHOPSLXYWw:e:Ss:n:m9")) != -1) {
+ while ((c = getopt(argc,argv,"F:D:d:z:k:q:ArlGUNHOPSLCXYWw:e:Ss:n:m9")) != -1) {
switch (c) {
case 'F': sourcedir = optarg; break;
case 'D': destdir = optarg; break;
@@ -1130,6 +1135,7 @@ main (int argc, char *argv[]) {
case 'P': action = POSITIONS; break;
case 'S': action = SUFFIX_ARRAY; break;
case 'L': action = LCP_CHILD; break;
+ case 'C': action = COMPRESSED_SUFFIX_ARRAY; break;
case 'X': action = ARRAY_UNCOMPRESS; break;
case 'Y': action = CHILD_UNCOMPRESS; break;
case 'W': writefilep = true; break;
@@ -1163,8 +1169,8 @@ main (int argc, char *argv[]) {
}
}
- argc -= (optind - 1);
- argv += (optind - 1);
+ argc -= optind;
+ argv += optind;
if (index1interval == 3) {
interval_char = '3';
@@ -1365,9 +1371,16 @@ main (int argc, char *argv[]) {
#endif
} else if (action == OFFSETS) {
- /* Usage: cat <genomefile> | gmapindex [-F <sourcedir>] [-D <destdir>] -d <dbname> -O
+ /* Usage: gmapindex [-F <sourcedir>] [-D <destdir>] -d <dbname> -O <genomefile>
Creates <destdir>/<dbname>.idxoffsets */
+ if (argc == 0) {
+ fp = stdin;
+ } else if ((fp = fopen(argv[0],"rb")) == NULL) {
+ fprintf(stderr,"Could not open file %s\n",argv[0]);
+ exit(9);
+ }
+
chromosomefile = (char *) CALLOC(strlen(sourcedir)+strlen("/")+
strlen(fileroot)+strlen(".chromosome.iit")+1,sizeof(char));
sprintf(chromosomefile,"%s/%s.chromosome.iit",sourcedir,fileroot);
@@ -1384,7 +1397,7 @@ main (int argc, char *argv[]) {
}
fprintf(stderr,"\n");
- Indexdb_write_offsets(destdir,interval_char,stdin,chromosome_iit,
+ Indexdb_write_offsets(destdir,interval_char,fp,chromosome_iit,
index1part,index1interval,
genome_lc_p,fileroot,mask_lowercase_p,compression_types);
} else {
@@ -1394,18 +1407,29 @@ main (int argc, char *argv[]) {
}
fprintf(stderr,"\n");
- Indexdb_write_offsets_huge(destdir,interval_char,stdin,chromosome_iit,
+ Indexdb_write_offsets_huge(destdir,interval_char,fp,chromosome_iit,
index1part,index1interval,
genome_lc_p,fileroot,mask_lowercase_p,compression_types);
}
+ if (argc > 0) {
+ fclose(fp);
+ }
+
Univ_IIT_free(&chromosome_iit);
} else if (action == POSITIONS) {
- /* Usage: cat <genomefile> | gmapindex [-F <sourcedir>] [-D <destdir>] -d <dbname> -P
+ /* Usage: gmapindex [-F <sourcedir>] [-D <destdir>] -d <dbname> -P <genomefile>
Requires <sourcedir>/<dbname>.idxoffsets.
Creates <destdir>/<dbname>.idxpositions */
+ if (argc == 0) {
+ fp = stdin;
+ } else if ((fp = fopen(argv[0],"rb")) == NULL) {
+ fprintf(stderr,"Could not open file %s\n",argv[0]);
+ exit(9);
+ }
+
chromosomefile = (char *) CALLOC(strlen(sourcedir)+strlen("/")+
strlen(fileroot)+strlen(".chromosome.iit")+1,sizeof(char));
sprintf(chromosomefile,"%s/%s.chromosome.iit",sourcedir,fileroot);
@@ -1413,6 +1437,7 @@ main (int argc, char *argv[]) {
fprintf(stderr,"IIT file %s is not valid\n",chromosomefile);
exit(9);
}
+ genomelength = Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias_p*/true);
FREE(chromosomefile);
filenames = Indexdb_get_filenames(&compression_type,&index1part,&index1interval,
@@ -1452,18 +1477,22 @@ main (int argc, char *argv[]) {
if (huge_offsets_p == false) {
Indexdb_write_positions(positionsfile_high,positionsfile_low,filenames->pointers_filename,
- filenames->offsets_filename,stdin,chromosome_iit,
- index1part,index1interval,
+ filenames->offsets_filename,fp,chromosome_iit,
+ index1part,index1interval,genomelength,
genome_lc_p,writefilep,fileroot,mask_lowercase_p,
compression_type,coord_values_8p);
} else {
Indexdb_write_positions_huge(positionsfile_high,positionsfile_low,filenames->pages_filename,filenames->pointers_filename,
- filenames->offsets_filename,stdin,chromosome_iit,
- index1part,index1interval,
+ filenames->offsets_filename,fp,chromosome_iit,
+ index1part,index1interval,genomelength,
genome_lc_p,writefilep,fileroot,mask_lowercase_p,
compression_type,coord_values_8p);
}
+ if (argc > 0) {
+ fclose(fp);
+ }
+
Filenames_free(&filenames);
FREE(positionsfile_high);
@@ -1496,7 +1525,7 @@ main (int argc, char *argv[]) {
sprintf(sarrayfile,"%s/%s.sarray",destdir,fileroot);
genomecomp = Genome_new(sourcedir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- /*uncompressedp*/false,/*access*/USE_MMAP_ONLY);
+ /*uncompressedp*/false,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
Sarray_write_array(sarrayfile,genomecomp,genomelength);
/* Bucket array */
@@ -1534,7 +1563,7 @@ main (int argc, char *argv[]) {
} else if (action == LCP_CHILD) {
/* Usage: gmapindex [-F <sourcedir>] [-D <destdir>] -d <dbname> -L
- Creates <destdir>/<dbname>.lcp and .saindex */
+ Creates <destdir>/<dbname>.lcp, .saindex, and .rank (needed by COMPRESSED_SUFFIX_ARRAY) */
chromosomefile = (char *) CALLOC(strlen(sourcedir)+strlen("/")+
strlen(fileroot)+strlen(".chromosome.iit")+1,sizeof(char));
@@ -1562,7 +1591,7 @@ main (int argc, char *argv[]) {
/* Required for computing LCP, but uses non-SIMD instructions */
genomebits = Genome_new(sourcedir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_BITS,
- /*uncompressedp*/false,/*access*/USE_MMAP_ONLY);
+ /*uncompressedp*/false,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
Genome_hr_setup(Genome_blocks(genomebits),/*snp_blocks*/NULL,
/*query_unk_mismatch_p*/false,/*genome_unk_mismatch_p*/false,
/*mode*/STANDARD);
@@ -1597,17 +1626,17 @@ main (int argc, char *argv[]) {
/* Assume we have lcp_bytes already in memory. Don't need to use guide for speed. */
lcpguidefile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".salcpguide1024")+1,sizeof(char));
sprintf(lcpguidefile,"%s/%s.salcpguide1024",destdir,fileroot);
- lcp_guide = (UINT4 *) Access_allocated(&lcpguide_len,&seconds,lcpguidefile,sizeof(UINT4));
+ lcp_guide = (UINT4 *) Access_allocate(&shmid,&lcpguide_len,&seconds,lcpguidefile,sizeof(UINT4),/*sharedp*/false);
FREE(lcpguidefile);
lcpexcfile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".salcpexc")+1,sizeof(char));
sprintf(lcpexcfile,"%s/%s.salcpexc",destdir,fileroot);
- lcp_exceptions = (UINT4 *) Access_allocated(&lcpexc_len,&seconds,lcpexcfile,sizeof(UINT4));
+ lcp_exceptions = (UINT4 *) Access_allocate(&shmid,&lcpexc_len,&seconds,lcpexcfile,sizeof(UINT4),/*sharedp*/false);
n_lcp_exceptions = lcpexc_len/(sizeof(UINT4) + sizeof(UINT4));
FREE(lcpexcfile);
genomecomp = Genome_new(sourcedir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- /*uncompressedp*/false,/*access*/USE_MMAP_ONLY);
+ /*uncompressedp*/false,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
/* Compute discriminating chars (DC) array */
discrim_chars = Sarray_discriminating_chars(&nbytes,sarrayfile,genomecomp,lcp_bytes,lcp_guide,
@@ -1642,8 +1671,90 @@ main (int argc, char *argv[]) {
FREE(lcp_bytes);
}
+ } else if (action == COMPRESSED_SUFFIX_ARRAY) {
+ /* Usage: gmapindex [-F <sourcedir>] [-D <destdir>] -d <dbname> -C
+ Creates <destdir>/<dbname>.lcp and .csa. Removes .sarray and .inverse_sarray */
+
+ chromosomefile = (char *) CALLOC(strlen(sourcedir)+strlen("/")+
+ strlen(fileroot)+strlen(".chromosome.iit")+1,sizeof(char));
+ sprintf(chromosomefile,"%s/%s.chromosome.iit",sourcedir,fileroot);
+ if ((chromosome_iit = Univ_IIT_read(chromosomefile,/*readonlyp*/true,/*add_iit_p*/false)) == NULL) {
+ fprintf(stderr,"IIT file %s is not valid\n",chromosomefile);
+ exit(9);
+ }
+ FREE(chromosomefile);
+
+ genomelength = Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias_p*/true);
+ Univ_IIT_free(&chromosome_iit);
+
+ if (genomelength > 4294967295) {
+ /* Warning message already printed for SUFFIX_ARRAY */
+ /* fprintf(stderr,"Suffix arrays not yet supported for large genomes with more than 2^32 bp. Will use hash table only.\n"); */
+ } else {
+ fprintf(stderr,"Building compressed suffix array\n");
+
+ /* No need to mmap SA anymore */
+ csaptrfiles[0] = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".csaAmeta")+1,sizeof(char));
+ sprintf(csaptrfiles[0],"%s/%s.csaAmeta",destdir,fileroot);
+ csaptrfiles[1] = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".csaCmeta")+1,sizeof(char));
+ sprintf(csaptrfiles[1],"%s/%s.csaCmeta",destdir,fileroot);
+ csaptrfiles[2] = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".csaGmeta")+1,sizeof(char));
+ sprintf(csaptrfiles[2],"%s/%s.csaGmeta",destdir,fileroot);
+ csaptrfiles[3] = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".csaTmeta")+1,sizeof(char));
+ sprintf(csaptrfiles[3],"%s/%s.csaTmeta",destdir,fileroot);
+ csaptrfiles[4] = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".csaXmeta")+1,sizeof(char));
+ sprintf(csaptrfiles[4],"%s/%s.csaXmeta",destdir,fileroot);
+
+ csacompfiles[0] = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".csaAstrm")+1,sizeof(char));
+ sprintf(csacompfiles[0],"%s/%s.csaAstrm",destdir,fileroot);
+ csacompfiles[1] = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".csaCstrm")+1,sizeof(char));
+ sprintf(csacompfiles[1],"%s/%s.csaCstrm",destdir,fileroot);
+ csacompfiles[2] = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".csaGstrm")+1,sizeof(char));
+ sprintf(csacompfiles[2],"%s/%s.csaGstrm",destdir,fileroot);
+ csacompfiles[3] = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".csaTstrm")+1,sizeof(char));
+ sprintf(csacompfiles[3],"%s/%s.csaTstrm",destdir,fileroot);
+ csacompfiles[4] = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".csaXstrm")+1,sizeof(char));
+ sprintf(csacompfiles[4],"%s/%s.csaXstrm",destdir,fileroot);
+
+ sasampleqfile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".sasampleq")+1,sizeof(char));
+ sprintf(sasampleqfile,"%s/%s.sasampleq",destdir,fileroot);
+ sasamplesfile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".sasamples")+1,sizeof(char));
+ sprintf(sasamplesfile,"%s/%s.sasamples",destdir,fileroot);
+ saindex0file = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".saindex0")+1,sizeof(char));
+ sprintf(saindex0file,"%s/%s.saindex0",destdir,fileroot);
+
+
+#if 0
+ csafile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".csa")+1,sizeof(char));
+ sprintf(csafile,"%s/%s.csa",destdir,fileroot);
+#endif
+ sarrayfile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".sarray")+1,sizeof(char));
+ sprintf(sarrayfile,"%s/%s.sarray",destdir,fileroot);
+ rankfile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(".rank")+1,sizeof(char));
+ sprintf(rankfile,"%s/%s.rank",destdir,fileroot);
+
+ genomecomp = Genome_new(sourcedir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
+ /*uncompressedp*/false,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
+ Sarray_write_csa(csaptrfiles,csacompfiles,sasampleqfile,sasamplesfile,saindex0file,
+ sarrayfile,rankfile,genomecomp,genomelength,CHARTABLE);
+ FREE(genomecomp);
+
+ remove(rankfile); /* Need to delete remove(rankfile) from Sarray_compute_lcp */
+ remove(sarrayfile);
+
+ FREE(rankfile);
+ FREE(sarrayfile);
+ FREE(saindex0file);
+ FREE(sasamplesfile);
+ FREE(sasampleqfile);
+ FREE(csacompfiles[3]); FREE(csaptrfiles[3]);
+ FREE(csacompfiles[2]); FREE(csaptrfiles[2]);
+ FREE(csacompfiles[1]); FREE(csaptrfiles[1]);
+ FREE(csacompfiles[0]); FREE(csaptrfiles[0]);
+ }
+
} else if (action == ARRAY_UNCOMPRESS) {
if (argc <= 2) {
start = end = 0;
@@ -1674,7 +1785,7 @@ main (int argc, char *argv[]) {
Univ_IIT_free(&chromosome_iit);
genomecomp = Genome_new(sourcedir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- /*uncompressedp*/false,/*access*/USE_MMAP_ONLY);
+ /*uncompressedp*/false,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
sarrayfile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("sarray")+1,sizeof(char));
sprintf(sarrayfile,"%s/%s%ssarray",destdir,fileroot,mode_prefix);
@@ -1689,23 +1800,23 @@ main (int argc, char *argv[]) {
lcpguidefile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("salcpguide1024")+1,sizeof(char));
sprintf(lcpguidefile,"%s/%s%ssalcpguide1024",destdir,fileroot,mode_prefix);
- lcp_guide = (UINT4 *) Access_allocated(&lcpguide_len,&seconds,lcpguidefile,sizeof(UINT4));
+ lcp_guide = (UINT4 *) Access_allocate(&shmid,&lcpguide_len,&seconds,lcpguidefile,sizeof(UINT4),/*sharedp*/false);
FREE(lcpguidefile);
lcpexcfile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("salcpexc")+1,sizeof(char));
sprintf(lcpexcfile,"%s/%s%ssalcpexc",destdir,fileroot,mode_prefix);
- lcp_exceptions = (UINT4 *) Access_allocated(&lcpexc_len,&seconds,lcpexcfile,sizeof(UINT4));
+ lcp_exceptions = (UINT4 *) Access_allocate(&shmid,&lcpexc_len,&seconds,lcpexcfile,sizeof(UINT4),/*sharedp*/false);
n_lcp_exceptions = lcpexc_len/(sizeof(UINT4) + sizeof(UINT4));
FREE(lcpexcfile);
childguidefile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("sachildguide1024")+1,sizeof(char));
sprintf(childguidefile,"%s/%s%ssachildguide1024",destdir,fileroot,mode_prefix);
- child_guide = (UINT4 *) Access_allocated(&childguide_len,&seconds,childguidefile,sizeof(UINT4));
+ child_guide = (UINT4 *) Access_allocate(&shmid,&childguide_len,&seconds,childguidefile,sizeof(UINT4),/*sharedp*/false);
FREE(childguidefile);
childexcfile = (char *) CALLOC(strlen(destdir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("sachildexc")+1,sizeof(char));
sprintf(childexcfile,"%s/%s%ssachildexc",destdir,fileroot,mode_prefix);
- child_exceptions = (UINT4 *) Access_allocated(&childexc_len,&seconds,childexcfile,sizeof(UINT4));
+ child_exceptions = (UINT4 *) Access_allocate(&shmid,&childexc_len,&seconds,childexcfile,sizeof(UINT4),/*sharedp*/false);
n_child_exceptions = childexc_len/(sizeof(UINT4) + sizeof(UINT4));
FREE(childexcfile);
diff --git a/src/goby.c b/src/goby.c
deleted file mode 100644
index 438c2c8..0000000
--- a/src/goby.c
+++ /dev/null
@@ -1,345 +0,0 @@
-static char rcsid[] = "$Id: goby.c 101822 2013-07-17 18:43:45Z twu $";
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include "goby.h"
-
-#include <stdlib.h>
-#include <ctype.h>
-#include <string.h>
-#include <math.h>
-
-#include "assert.h"
-#include "mem.h"
-#include "chrnum.h"
-#include "substring.h"
-#include "samflags.h"
-#include "samprint.h"
-
-
-/* #define DEBUG */
-#ifdef DEBUG
-#define debug(x) x
-#else
-#define debug(x)
-#endif
-
-
-static bool show_refdiff_p;
-
-
-#ifdef HAVE_GOBY
-#include <goby/C_Reads.h>
-#include <goby/C_Alignments.h>
-#include <goby/C_CompactHelpers.h>
-
-struct Gobyreader_T {
- CReadsHelper *helper;
- bool complement_reads_p;
-};
-
-struct Gobywriter_T {
- CAlignmentsWriterHelper *helper;
-};
-
-#endif
-
-
-static char complCode[129] = "???????????????????????????????? ??#$%&')(*+,-./0123456789:;>=<??TVGHEFCDIJMLKNOPQYSAABWXRZ]?[^_`tvghefcdijmlknopqysaabwxrz}|{~?";
-
-
-/**
- * Duplicate a string with an optional length to copy. If the incoming
- * str is NULL, this will return null. If length is -1, the length
- * that is copied is strlen(str). The size of the returned buffer is
- * always length + 1 (to include the trailing '\0'). The caller is required
- * to FREE the string.
- * @param str the string to copy
- * @param the maximum length to copy or -1 for the whole string
- * @return the duplicate string.
- */
-static char *
-copy_string(char *str, int length) {
- int copy_length = length;
- char *new_str = (char *) NULL;
-
- if (str != NULL) {
- if (copy_length == -1) {
- copy_length = strlen(str);
- }
- new_str = (char *) CALLOC(copy_length + 1, sizeof(char));
- strncpy(new_str, str, copy_length);
- new_str[copy_length] = '\0';
- }
-
- return new_str;
-}
-
-void
-Goby_setup (bool show_refdiff_p_in) {
- show_refdiff_p = show_refdiff_p_in;
- return;
-}
-
-void
-Goby_shutdown () {
-#ifdef HAVE_GOBY
- goby_shutdownProtobuf();
-#endif
- return;
-}
-
-
-/************************************************************************
- * Reader
- ************************************************************************/
-
-
-Gobyreader_T
-Goby_reader_new (char **files, int nfiles, unsigned long window_start, unsigned long window_end, bool complement_reads_p) {
-#ifdef HAVE_GOBY
- Gobyreader_T new = (Gobyreader_T) MALLOC(sizeof(*new));
-
- new->complement_reads_p = complement_reads_p;
- fprintf(stderr,"Opening %s start=%lu, end=%lu\n",files[0], window_start, window_end);
- gobyReads_openReadsReaderWindowed(files,nfiles,/*circularp*/false,window_start,window_end,&new->helper);
- gobyReads_avoidZeroQuals(new->helper, 1);
- return new;
-#else
- return NULL;
-#endif
-}
-
-
-void
-Goby_reader_finish (Gobyreader_T reader) {
-#ifdef HAVE_GOBY
- gobyReads_finished(reader->helper);
-#endif
- return;
-}
-
-
-void
-Goby_reader_free (Gobyreader_T *old) {
- FREE(*old);
- return;
-}
-
-
-Shortread_T
-Goby_read (Shortread_T *queryseq2, Gobyreader_T reader, int barcode_length,
- bool invert_first_p, bool invert_second_p, bool skipp) {
-#ifdef HAVE_GOBY
- unsigned long goby_read_index;
- char *acc, *read_identifier = NULL, *description = NULL;
- char *sequence1, *quality1, *sequence2, *quality2;
- int sequence1_length, quality1_length, sequence2_length, quality2_length, acc_length;
- int i;
-
- sequence1_length = 0;
- while (sequence1_length == 0) {
- /* Ignore empty sequences */
- if (gobyReads_hasNext(reader->helper) != 1) {
- return (Shortread_T) NULL;
- }
- goby_read_index =
- gobyReads_nextSequencePair(reader->helper,&read_identifier,&description,
- &sequence1,&sequence1_length,
- &quality1,&quality1_length,
- &sequence2,&sequence2_length,
- &quality2,&quality2_length);
- if (sequence1_length != 0) {
- acc = (char *) CALLOC(25,sizeof(char));
- sprintf(acc, "%lu", goby_read_index);
- description = copy_string(description, -1);
- }
- }
-
- if (reader->complement_reads_p) {
- debug(
- if (sequence1_length > 0) {
- fprintf(stderr,"before complement, sequence1: %s\n", sequence1);
- }
- if (sequence2_length > 0) {
- fprintf(stderr,"before complement, sequence2: %s\n", sequence2);
- }
- );
- for (i = 0; i < sequence1_length; i++) {
- sequence1[i] = complCode[(int) sequence1[i]];
- }
- for (i = 0; i < sequence2_length; i++) {
- sequence2[i] = complCode[(int) sequence2[i]];
- }
- debug(
- if (sequence1_length > 0) {
- fprintf(stderr," after complement, sequence1: %s\n", sequence1);
- }
- if (sequence2_length > 0) {
- fprintf(stderr," after complement, sequence2: %s\n", sequence2);
- }
- );
- }
-
- *queryseq2 = Shortread_new(/*acc*/NULL,/*description*/NULL,/*filterp*/false,
- sequence2,sequence2_length,quality2,quality2_length,
- barcode_length,invert_second_p,/*copy_acc*/false,skipp);
-
- return Shortread_new(acc,description,/*filterp*/false,
- sequence1,sequence1_length,quality1,quality1_length,
- barcode_length,invert_first_p,/*copy_acc*/false,skipp);
-#else
- return (Shortread_T) NULL;
-#endif
-}
-
-
-/************************************************************************
- * Writer
- ************************************************************************/
-
-
-Gobywriter_T
-Goby_writer_new (char *output_root, char *aligner_name, char *aligner_version) {
-#ifdef HAVE_GOBY
- Gobywriter_T new = (Gobywriter_T) MALLOC(sizeof(*new));
-
- gobyAlignments_openAlignmentsWriterDefaultEntriesPerChunk(output_root,&new->helper);
- gobyAlignments_setAlignerName(new->helper,aligner_name);
- gobyAlignments_setAlignerVersion(new->helper,aligner_version);
- gobyGsnap_startAlignment(new->helper);
- gobyCapture_open(new->helper, 1);
- return new;
-#else
- return NULL;
-#endif
-}
-
-
-void
-Goby_writer_finish (Gobywriter_T writer, Gobyreader_T reader) {
-#ifdef HAVE_GOBY
- gobyAlignments_finished(writer->helper,reader->helper->numberOfReads);
-#endif
- return;
-}
-
-
-void
-Goby_writer_free (Gobywriter_T *old) {
-#ifdef HAVE_GOBY
- gobyCapture_close((*old)->helper);
-#endif
- FREE(*old);
- return;
-}
-
-
-void
-Goby_writer_add_chromosomes (Gobywriter_T writer, Univ_IIT_T chromosome_iit) {
-#ifdef HAVE_GOBY
- int nintervals, gsnap_target_index;
- char *gsnap_target_label;
- bool allocp;
- Univinterval_T interval;
- Chrpos_T length;
-
- nintervals = Univ_IIT_total_nintervals(chromosome_iit);
- for (gsnap_target_index = 1; gsnap_target_index <= nintervals; gsnap_target_index++) {
- gsnap_target_label = Univ_IIT_label(chromosome_iit,gsnap_target_index,&allocp);
- interval = Univ_IIT_interval(chromosome_iit,gsnap_target_index);
- length = Univinterval_length(interval);
- /* goby_target_index is 0-based, gsnap_target_index is 1-based. */
- gobyAlignments_addTarget(writer->helper,gsnap_target_index - 1,gsnap_target_label,length);
- debug(fprintf(stderr, "%u is %s\n", gsnap_target_index - 1, gsnap_target_label));
- if (allocp == true) {
- FREE(gsnap_target_label);
- }
- }
-#endif
- return;
-}
-
-
-void
-Goby_file_handles (FILE **fp_capture, FILE **fp_ignore, Gobywriter_T writer) {
-#ifdef HAVE_GOBY
- *fp_capture = gobyCapture_fileHandle(writer->helper);
- *fp_ignore = gobyCapture_ignoredFileHandle(writer->helper);
-#else
- *fp_capture = NULL;
- *fp_ignore = NULL;
-#endif
- return;
-}
-
-
-void
-Goby_observe_aligned(Gobywriter_T writer) {
-#ifdef HAVE_GOBY
- writer->helper->numberOfAlignedReads++;
-#endif /* HAVE_GOBY */
- return;
-}
-
-
-void
-Goby_start_capture (Gobywriter_T writer) {
-#ifdef HAVE_GOBY
- gobyCapture_startNew(writer->helper);
-#endif
-}
-
-
-void
-Goby_finish_capture (Gobywriter_T writer) {
-#ifdef HAVE_GOBY
- char *capturedData;
- char *ignoredData;
- gobyCapture_flush(writer->helper);
- capturedData = gobyCapture_capturedData(writer->helper);
- if (strlen(capturedData) > 0) {
- Goby_observe_aligned(writer);
- gobyGsnap_parse(writer->helper, capturedData);
- }
-#endif
- return;
-}
-
-
-void
-Goby_print_tmh (Gobywriter_T writer, Stage3end_T stage3, Shortread_T queryseq, int npaths) {
-#ifdef HAVE_GOBY
- unsigned long goby_read_index;
- UINT4 query_aligned_length;
-
- Goby_observe_aligned(writer);
-
- goby_read_index = (unsigned long) strtoul(Shortread_accession(queryseq), NULL, 10);
- query_aligned_length = Stage3end_query_alignment_length(stage3);
- gobyAlEntry_appendTooManyHits(writer->helper,goby_read_index,query_aligned_length,npaths);
-#endif /* HAVE_GOBY */
- return;
-}
-
-void
-Goby_print_pair_tmh (Gobywriter_T writer, Resulttype_T resulttype, Stage3pair_T stage3pair, Shortread_T queryseq, int npaths) {
-#ifdef HAVE_GOBY
- unsigned long goby_read_index;
- UINT4 query_aligned_length;
-
- Goby_observe_aligned(writer);
-
- /* TODO: Is this correct for both cases (PAIRED_MULT, CONCORDANT_MULT)? */
- /* TODO: Q: Should we be outputting BOTH primary and mate TMH? */
- goby_read_index = (unsigned long) strtoul(Shortread_accession(queryseq), NULL, 10);
- query_aligned_length = Stage3end_query_alignment_length(Stage3pair_hit5(stage3pair));
- gobyAlEntry_appendTooManyHits(writer->helper,goby_read_index,query_aligned_length,npaths);
-
- goby_read_index = (unsigned long) strtoul(Shortread_accession(queryseq), NULL, 10);
- query_aligned_length = Stage3end_query_alignment_length(Stage3pair_hit3(stage3pair));
- gobyAlEntry_appendTooManyHits(writer->helper,goby_read_index,query_aligned_length,npaths);
-#endif /* HAVE_GOBY */
- return;
-}
diff --git a/src/goby.h b/src/goby.h
deleted file mode 100644
index 43cee62..0000000
--- a/src/goby.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* $Id: goby.h 101822 2013-07-17 18:43:45Z twu $ */
-#ifndef GOBY_INCLUDED
-#define GOBY_INCLUDED
-
-#include <stdio.h>
-#include "bool.h"
-#include "iit-read-univ.h"
-#include "shortread.h"
-#include "stage3hr.h"
-
-typedef struct Gobyreader_T *Gobyreader_T;
-typedef struct Gobywriter_T *Gobywriter_T;
-
-extern void
-Goby_setup (bool show_refdiff_p_in);
-extern void
-Goby_shutdown ();
-
-extern Gobyreader_T
-Goby_reader_new (char **files, int nfiles, unsigned long window_start, unsigned long window_end, bool complement_reads_p);
-extern void
-Goby_reader_finish (Gobyreader_T reader);
-extern void
-Goby_reader_free (Gobyreader_T *old);
-extern Shortread_T
-Goby_read (Shortread_T *queryseq2, Gobyreader_T reader, int barcode_length,
- bool invert_first_p, bool invert_second_p, bool skipp);
-
-extern Gobywriter_T
-Goby_writer_new (char *output_root, char *aligner_name, char *aligner_version);
-extern void
-Goby_writer_finish (Gobywriter_T writer, Gobyreader_T reader);
-extern void
-Goby_writer_free (Gobywriter_T *old);
-extern void
-Goby_writer_add_chromosomes (Gobywriter_T writer, Univ_IIT_T chromosome_iit);
-extern void
-Goby_file_handles (FILE **fp_capture, FILE**fp_ignore, Gobywriter_T writer);
-extern void
-Goby_start_capture (Gobywriter_T writer);
-extern void
-Goby_finish_capture (Gobywriter_T writer);
-extern void
-Goby_print_tmh (Gobywriter_T writer, Stage3end_T stage3, Shortread_T queryseq1, int npaths);
-extern void
-Goby_print_pair_tmh (Gobywriter_T writer, Resulttype_T resulttype, Stage3pair_T stage3pair, Shortread_T queryseq1, int npaths);
-
-#endif
-
diff --git a/src/gregion.h b/src/gregion.h
index 6930aaa..8b54a0f 100644
--- a/src/gregion.h
+++ b/src/gregion.h
@@ -1,6 +1,7 @@
-/* $Id: gregion.h 145990 2014-08-25 21:47:32Z twu $ */
+/* $Id: gregion.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef GREGION_INCLUDED
#define GREGION_INCLUDED
+
#include "bool.h"
#include "genomicpos.h"
#include "types.h"
diff --git a/src/gsnap.c b/src/gsnap.c
index a9d3690..f6931d6 100644
--- a/src/gsnap.c
+++ b/src/gsnap.c
@@ -1,8 +1,13 @@
-static char rcsid[] = "$Id: gsnap.c 158355 2015-02-10 19:08:45Z twu $";
+static char rcsid[] = "$Id: gsnap.c 166787 2015-06-02 18:00:56Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
+#ifdef USE_MPI
+#include <mpi.h>
+#include "mpidebug.h"
+#endif
+
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h> /* Needed to define pthread_t on Solaris */
#endif
@@ -63,7 +68,6 @@ static char rcsid[] = "$Id: gsnap.c 158355 2015-02-10 19:08:45Z twu $";
#include "mapq.h"
#include "substring.h"
#include "stage3hr.h"
-#include "goby.h"
#include "spanningelt.h"
#include "splicestringpool.h"
#include "splicetrie_build.h"
@@ -91,13 +95,18 @@ static char rcsid[] = "$Id: gsnap.c 158355 2015-02-10 19:08:45Z twu $";
#include "listdef.h"
#include "iit-read.h"
#include "datadir.h"
+#include "samprint.h" /* For SAM_setup */
+
+#include "filestring.h"
+#include "output.h"
#include "inbuffer.h"
#include "outbuffer.h"
-#include "samprint.h" /* For SAM_setup */
+#ifdef USE_MPI
+#include "master.h"
+#endif
#include "stage3.h" /* To get EXTRAQUERYGAP */
-#include "pair.h" /* For Cigar_action_T */
-
+#include "pair.h"
#include "getopt.h"
@@ -107,6 +116,21 @@ static char rcsid[] = "$Id: gsnap.c 158355 2015-02-10 19:08:45Z twu $";
#define MAX_GENOMICLENGTH_FOR_ALLOC 1000000
+/* MPI Processing */
+#ifdef DEBUGM
+#define debugm(x) x
+#else
+#define debugm(x)
+#endif
+
+/* File open/close. Want to turn on in shortread.c also. */
+#ifdef DEBUGF
+#define debugf(x) x
+#else
+#define debugf(x)
+#endif
+
+
#ifdef DEBUG
#define debug(x) x
#else
@@ -124,6 +148,13 @@ static int maxpeelback = 20; /* Now controlled by defect_rate */
static int maxpeelback_distalmedial = 24;
static int extramaterial_end = 10;
static int extramaterial_paired = 8;
+static int sufflookback = 60;
+static int nsufflookback = 5;
+static int extraband_single = 3;
+static int extraband_end = 3; /* Shouldn't differ from 0, since onesidegapp is true? */
+static int extraband_paired = 7;
+static int ngap = 3; /* 0? */
+
static int max_gmap_pairsearch = 50; /* Will perform GMAP on up to this many hits5 or hits3 */
static int max_gmap_terminal = 50; /* Will perform GMAP on up to this many terminals5 or terminals3 */
static int max_gmap_improvement = 5;
@@ -148,6 +179,7 @@ static Univ_IIT_T chromosome_iit = NULL;
static int circular_typeint = -1;
static int nchromosomes = 0;
static bool *circularp = NULL;
+static bool any_circular_p;
static Indexdb_T indexdb = NULL;
static Indexdb_T indexdb2 = NULL; /* For cmet or atoi */
static Genome_T genomecomp = NULL;
@@ -177,7 +209,6 @@ static char ATOI_REV_CHARTABLE[4] = {'A','C','G','C'}; /* TC */
static bool fastq_format_p = false;
static bool want_random_p = true; /* randomize among equivalent scores */
-static bool creads_format_p = false;
static Stopwatch_T stopwatch = NULL;
/************************************************************************
@@ -207,25 +238,33 @@ static bool chop_primers_p = false;
static bool query_unk_mismatch_p = false;
static bool genome_unk_mismatch_p = true;
static bool novelsplicingp = false;
+static bool find_dna_chimeras_p = false;
static int trim_mismatch_score = -3;
static int trim_indel_score = -2; /* was -4 */
-static Access_mode_T offsetsstrm_access = USE_ALLOCATE;
+static bool sharedp = true;
static bool expand_offsets_p = false;
/* Note: sarray aux files (like lcpchilddc) are always allocated */
#ifdef HAVE_MMAP
-static Access_mode_T positions_access = USE_MMAP_PRELOAD;
-static Access_mode_T genome_access = USE_MMAP_PRELOAD;
+/* Level 4 is now default */
+static Access_mode_T offsetsstrm_access = USE_ALLOCATE;
+static Access_mode_T positions_access = USE_ALLOCATE;
+static Access_mode_T genome_access = USE_ALLOCATE;
static Access_mode_T sarray_access = USE_MMAP_PRELOAD;
-static Access_mode_T aux_access = USE_MMAP_PRELOAD;
+static Access_mode_T lcp_access = USE_MMAP_PRELOAD;
+static Access_mode_T guideexc_access = USE_ALLOCATE;
+static Access_mode_T indexij_access = USE_ALLOCATE;
#else
+static Access_mode_T offsetsstrm_access = USE_ALLOCATE;
static Access_mode_T positions_access = USE_ALLOCATE;
static Access_mode_T genome_access = USE_ALLOCATE;
static Access_mode_T sarray_access = USE_ALLOCATE;
-static Access_mode_T aux_access = USE_ALLOCATE;
+static Access_mode_T lcp_access = USE_ALLOCATE;
+static Access_mode_T guideexc_access = USE_ALLOCATE;
+static Access_mode_T indexij_access = USE_ALLOCATE;
#endif
static int pairmax;
@@ -234,12 +273,23 @@ static int pairmax_rna = 200000;
static int expected_pairlength = 200;
static int pairlength_deviation = 100;
+#ifdef USE_MPI
+static int nranks, n_slave_ranks, myid, provided;
+static int exclude_ranks[1];
+static MPI_Comm workers_comm;
+static MPI_Group world_group, workers_group;
+static int nthreads0;
+#endif
+
#ifdef HAVE_PTHREAD
static pthread_t output_thread_id, *worker_thread_ids;
+#ifdef USE_MPI
+static pthread_t write_stdout_thread_id, parser_thread_id, mpi_interface_thread_id;
+#endif
static pthread_key_t global_request_key;
-static int nworkers = 1; /* (int) sysconf(_SC_NPROCESSORS_ONLN) */
+static int nthreads = 1; /* (int) sysconf(_SC_NPROCESSORS_ONLN) */
#else
-static int nworkers = 0; /* (int) sysconf(_SC_NPROCESSORS_ONLN) */
+static int nthreads = 0; /* (int) sysconf(_SC_NPROCESSORS_ONLN) */
#endif
/* static Masktype_T masktype = MASK_REPETITIVE; */
@@ -353,9 +403,8 @@ static int *runlength_divint_crosstable = NULL;
/* Output options */
static unsigned int output_buffer_size = 1000;
static bool output_sam_p = false;
-static bool output_goby_p = false;
-/* For Illumina, subtract 64. For Sanger, subtract 33. For Goby, subtract 0. */
+/* For Illumina, subtract 64. For Sanger, subtract 33. */
/* static int quality_score_adj = 64; -- Stored in mapq.c */
static bool user_quality_score_adj = false;
@@ -392,25 +441,16 @@ static char *sam_read_group_platform = NULL;
static bool force_xs_direction_p = false;
static bool md_lowercase_variant_p = false;
static bool hide_soft_clips_p = false;
-static Cigar_action_T cigar_action = CIGAR_ACTION_WARNING;
-
-/* Goby */
-static char *goby_output_root = NULL;
-static unsigned long creads_window_start = 0;
-static unsigned long creads_window_end = 0;
-static bool creads_complement_p = false;
-static Gobyreader_T gobyreader = NULL;
-static Gobywriter_T gobywriter = NULL;
/* Input/output */
-static char *sevenway_root = NULL;
+static char *split_output_root = NULL;
+static char *output_file = NULL;
static char *failedinput_root = NULL;
static bool appendp = false;
static Outbuffer_T outbuffer;
static Inbuffer_T inbuffer;
static unsigned int inbuffer_nspaces = 1000;
-static unsigned int inbuffer_maxchars = -1U; /* Currently not used by Inbuffer_T */
static bool timingp = false;
static bool unloadp = false;
@@ -429,7 +469,7 @@ static struct option long_options[] = {
{"sampling", required_argument, 0, 0}, /* required_index1interval, index1interval */
{"genomefull", no_argument, 0, 'G'}, /* uncompressedp */
{"part", required_argument, 0, 'q'}, /* part_modulus, part_interval */
- {"orientation", required_argument, 0, 'o'}, /* invert_first_p, invert_second_p */
+ {"orientation", required_argument, 0, 0}, /* invert_first_p, invert_second_p */
{"input-buffer-size", required_argument, 0, 0}, /* inbuffer_nspaces */
{"barcode-length", required_argument, 0, 0}, /* barcode_length */
{"fastq-id-start", required_argument, 0, 0}, /* acc_fieldi_start */
@@ -447,6 +487,7 @@ static struct option long_options[] = {
#endif
/* Compute options */
+ {"use-shared-memory", required_argument, 0, 0}, /* sharedp */
#ifdef HAVE_MMAP
{"batch", required_argument, 0, 'B'}, /* offsetsstrm_access, positions_access, genome_access */
#endif
@@ -456,7 +497,7 @@ static struct option long_options[] = {
{"pairexpect", required_argument, 0, 0}, /* expected_pairlength */
{"pairdev", required_argument, 0, 0}, /* pairlength_deviation */
- {"nthreads", required_argument, 0, 't'}, /* nworkers */
+ {"nthreads", required_argument, 0, 't'}, /* nthreads */
{"adapter-strip", required_argument, 0, 'a'}, /* chop_primers_p */
{"query-unk-mismatch", required_argument, 0, 0}, /* query_unk_mismatch_p */
@@ -465,6 +506,7 @@ static struct option long_options[] = {
{"trim-mismatch-score", required_argument, 0, 0}, /* trim_mismatch_score */
{"trim-indel-score", required_argument, 0, 0}, /* trim_indel_score */
{"novelsplicing", required_argument, 0, 'N'}, /* novelsplicingp */
+ {"find-dna-chimeras", required_argument, 0, 0}, /* find_dna_chimeras */
{"max-mismatches", required_argument, 0, 'm'}, /* user_maxlevel_float */
{"terminal-threshold", required_argument, 0, 0}, /* terminal_threshold */
@@ -488,7 +530,7 @@ static struct option long_options[] = {
{"localsplicedist", required_argument, 0, 'w'}, /* shortsplicedist */
{"novelend-splicedist", required_argument, 0, 0}, /* shortsplicedist_novelend */
{"splicingdir", required_argument, 0, 0}, /* user_splicingdir */
- {"use-splicing", required_argument, 0, 's'}, /* splicing_iit, knownsplicingp */
+ {"use-splicing", required_argument, 0, 's'}, /* splicing_iit, knownsplicingp, find_dna_chimeras_p */
{"ambig-splice-noclip", no_argument, 0, 0}, /* amb_clip_p */
{"genes", required_argument, 0, 'g'}, /* genes_iit */
{"favor-multiexon", no_argument, 0, 0}, /* favor_multiexon_p */
@@ -527,7 +569,7 @@ static struct option long_options[] = {
/* Output options */
{"output-buffer-size", required_argument, 0, 0}, /* output_buffer_size */
- {"format", required_argument, 0, 'A'}, /* output_sam_p, output_goby_p, print_m8_p */
+ {"format", required_argument, 0, 'A'}, /* output_sam_p, print_m8_p */
{"quality-protocol", required_argument, 0, 0}, /* quality_score_adj, quality_shift */
{"quality-zero-score", required_argument, 0, 'J'}, /* quality_score_adj */
@@ -543,7 +585,6 @@ static struct option long_options[] = {
{"force-xs-dir", no_argument, 0, 0}, /* force_xs_direction_p */
{"md-lowercase-snp", no_argument, 0, 0}, /* md_lowercase_variant_p */
{"extend-soft-clips", no_argument, 0, 0}, /* hide_soft_clips_p */
- {"action-if-cigar-error", required_argument, 0, 0}, /* cigar_action */
{"noexceptions", no_argument, 0, '0'}, /* exception_raise_p */
{"maxsearch", required_argument, 0, 0}, /* maxpaths_search */
@@ -556,20 +597,13 @@ static struct option long_options[] = {
{"print-snps", no_argument, 0, 0}, /* print_snplabels_p */
{"failsonly", no_argument, 0, 0}, /* failsonlyp */
{"nofails", no_argument, 0, 0}, /* nofailsp */
- {"split-output", required_argument, 0, 0}, /* sevenway_root */
+ {"output-file", required_argument, 0, 'o'}, /* output_file */
+ {"split-output", required_argument, 0, 0}, /* split_output_root */
{"failed-input", required_argument, 0, 0}, /* failed_input_root */
{"append-output", no_argument, 0, 0}, /* appendp */
{"order-among-best", required_argument, 0, 0}, /* want_random_p */
-#ifdef HAVE_GOBY
- /* Goby-specific options */
- {"goby-output", required_argument, 0, 0}, /* goby_output_root */
- {"creads-window-start", required_argument, 0, 0}, /* creads_window_start */
- {"creads-window-end", required_argument, 0, 0}, /* creads_window_end */
- {"creads-complement", no_argument, 0, 0}, /* creads_complement_p */
-#endif
-
/* Diagnostic options */
{"time", no_argument, 0, 0}, /* timingp */
{"unload", no_argument, 0, 0}, /* unloadp */
@@ -791,12 +825,15 @@ check_compiler_assumptions () {
/************************************************************************/
-static Result_T
-process_request (Request_T request, Floors_T *floors_array,
+static Filestring_T
+process_request (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
+ double *worker_runtime, Request_T request, Floors_T *floors_array,
Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Stopwatch_T worker_stopwatch) {
+ Filestring_T fp;
+ Result_T result;
int jobid;
Shortread_T queryseq1, queryseq2;
Stage3end_T *stage3array, *stage3array5, *stage3array3;
@@ -805,7 +842,6 @@ process_request (Request_T request, Floors_T *floors_array,
int npaths, npaths5, npaths3, i;
int first_absmq, second_absmq, first_absmq5, second_absmq5, first_absmq3, second_absmq3;
Pairtype_T final_pairtype;
- double worker_runtime;
jobid = Request_id(request);
queryseq1 = Request_queryseq1(request);
@@ -824,7 +860,7 @@ process_request (Request_T request, Floors_T *floors_array,
if (queryseq2 == NULL) {
stage3array = Stage1_single_read(&npaths,&first_absmq,&second_absmq,
queryseq1,indexdb,indexdb2,indexdb_size_threshold,
- genomecomp,floors_array,user_maxlevel_float,
+ floors_array,user_maxlevel_float,
indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
@@ -832,14 +868,17 @@ process_request (Request_T request, Floors_T *floors_array,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
/*keep_floors_p*/true);
- worker_runtime = worker_stopwatch == NULL ? 0.00 : Stopwatch_stop(worker_stopwatch);
- return Result_single_read_new(jobid,(void **) stage3array,npaths,first_absmq,second_absmq,worker_runtime);
+ result = Result_single_read_new(jobid,(void **) stage3array,npaths,first_absmq,second_absmq);
+ fp = Output_filestring_fromresult(&(*fp_failedinput_1),&(*fp_failedinput_2),result,request);
+ *worker_runtime = worker_stopwatch == NULL ? 0.00 : Stopwatch_stop(worker_stopwatch);
+ Result_free(&result);
+ return fp;
} else if ((stage3pairarray = Stage1_paired_read(&npaths,&first_absmq,&second_absmq,&final_pairtype,
&stage3array5,&npaths5,&first_absmq5,&second_absmq5,
&stage3array3,&npaths3,&first_absmq3,&second_absmq3,
queryseq1,queryseq2,indexdb,indexdb2,indexdb_size_threshold,
- genomecomp,floors_array,user_maxlevel_float,
+ floors_array,user_maxlevel_float,
indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
@@ -847,16 +886,22 @@ process_request (Request_T request, Floors_T *floors_array,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
pairmax,/*keep_floors_p*/true)) != NULL) {
/* Paired or concordant hits found */
- worker_runtime = worker_stopwatch == NULL ? 0.00 : Stopwatch_stop(worker_stopwatch);
- return Result_paired_read_new(jobid,(void **) stage3pairarray,npaths,first_absmq,second_absmq,
- final_pairtype,worker_runtime);
+ result = Result_paired_read_new(jobid,(void **) stage3pairarray,npaths,first_absmq,second_absmq,
+ final_pairtype);
+ fp = Output_filestring_fromresult(&(*fp_failedinput_1),&(*fp_failedinput_2),result,request);
+ *worker_runtime = worker_stopwatch == NULL ? 0.00 : Stopwatch_stop(worker_stopwatch);
+ Result_free(&result);
+ return fp;
} else if (chop_primers_p == false || Shortread_chop_primers(queryseq1,queryseq2) == false) {
/* No paired or concordant hits found, and no adapters found */
/* Report ends as unpaired */
- worker_runtime = worker_stopwatch == NULL ? 0.00 : Stopwatch_stop(worker_stopwatch);
- return Result_paired_as_singles_new(jobid,(void **) stage3array5,npaths5,first_absmq5,second_absmq5,
- (void **) stage3array3,npaths3,first_absmq3,second_absmq3,worker_runtime);
+ result = Result_paired_as_singles_new(jobid,(void **) stage3array5,npaths5,first_absmq5,second_absmq5,
+ (void **) stage3array3,npaths3,first_absmq3,second_absmq3);
+ fp = Output_filestring_fromresult(&(*fp_failedinput_1),&(*fp_failedinput_2),result,request);
+ *worker_runtime = worker_stopwatch == NULL ? 0.00 : Stopwatch_stop(worker_stopwatch);
+ Result_free(&result);
+ return fp;
} else {
/* Try with potential primers chopped. queryseq1 and queryseq2 altered by Shortread_chop_primers. */
@@ -874,7 +919,7 @@ process_request (Request_T request, Floors_T *floors_array,
&stage3array5,&npaths5,&first_absmq5,&second_absmq5,
&stage3array3,&npaths3,&first_absmq3,&second_absmq3,
queryseq1,queryseq2,indexdb,indexdb2,indexdb_size_threshold,
- genomecomp,floors_array,user_maxlevel_float,
+ floors_array,user_maxlevel_float,
indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
@@ -882,16 +927,19 @@ process_request (Request_T request, Floors_T *floors_array,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
pairmax,/*keep_floors_p*/false)) != NULL) {
/* Paired or concordant hits found, after chopping adapters */
- worker_runtime = worker_stopwatch == NULL ? 0.00 : Stopwatch_stop(worker_stopwatch);
- return Result_paired_read_new(jobid,(void **) stage3pairarray,npaths,first_absmq,second_absmq,
- final_pairtype,worker_runtime);
+ result = Result_paired_read_new(jobid,(void **) stage3pairarray,npaths,first_absmq,second_absmq,
+ final_pairtype);
} else {
/* No paired or concordant hits found, after chopping adapters */
- worker_runtime = worker_stopwatch == NULL ? 0.00 : Stopwatch_stop(worker_stopwatch);
- return Result_paired_as_singles_new(jobid,(void **) stage3array5,npaths5,first_absmq5,second_absmq5,
- (void **) stage3array3,npaths3,first_absmq3,second_absmq3,worker_runtime);
+ result = Result_paired_as_singles_new(jobid,(void **) stage3array5,npaths5,first_absmq5,second_absmq5,
+ (void **) stage3array3,npaths3,first_absmq3,second_absmq3);
}
+
+ fp = Output_filestring_fromresult(&(*fp_failedinput_1),&(*fp_failedinput_2),result,request);
+ *worker_runtime = worker_stopwatch == NULL ? 0.00 : Stopwatch_stop(worker_stopwatch);
+ Result_free(&result);
+ return fp;
}
}
@@ -903,71 +951,67 @@ static const Except_T sigsegv_error = {"SIGSEGV--segmentation violation"};
static const Except_T sigtrap_error = {"SIGTRAP--hardware fault"};
static const Except_T misc_signal_error = {"Miscellaneous signal"};
-#if 0
-static void
-signal_handler_old (int sig) {
- if (sig == SIGUSR1) {
-#ifdef HAVE_PTHREAD
- pthread_exit(NULL);
-#else
- exit(9);
-#endif
- } else if (sig == SIGFPE) {
- Except_raise(&sigfpe_error,__FILE__,__LINE__);
- } else if (sig == SIGSEGV) {
- Except_raise(&sigsegv_error,__FILE__,__LINE__);
- } else if (sig == SIGTRAP) {
- Except_raise(&sigtrap_error,__FILE__,__LINE__);
- } else {
- fprintf(stderr,"Signal %d\n",sig);
- Except_raise(&misc_signal_error,__FILE__,__LINE__);
- }
- return;
-}
-#endif
-
static void
signal_handler (int sig) {
Request_T request;
Shortread_T queryseq1, queryseq2;
- if (sig == SIGFPE) {
- fprintf(stderr,"Signal received: Floating point error\n");
- } else if (sig == SIGSEGV) {
- fprintf(stderr,"Signal received: Segmentation fault\n");
- } else if (sig == SIGTRAP) {
- fprintf(stderr,"Signal received: Trap\n");
- } else {
- fprintf(stderr,"Signal received: %d\n",sig);
+ switch (sig) {
+ case SIGABRT: fprintf(stderr,"Signal received: SIGABRT\n"); break;
+ case SIGFPE: fprintf(stderr,"Signal received: SIGFPE\n"); break;
+ case SIGHUP: fprintf(stderr,"Signal received: SIGHUP\n"); break;
+ case SIGILL:
+ fprintf(stderr,"Signal received: SIGILL\n");
+ fprintf(stderr,"An illegal instruction means that this program is being run on a computer\n");
+ fprintf(stderr," with different features than the computer used to compile the program\n");
+ fprintf(stderr,"You may need to re-compile the program with fewer features by doing something like\n");
+ fprintf(stderr," ./configure --disable-simd\n");
+ break;
+ case SIGINT: fprintf(stderr,"Signal received: SIGINT\n"); break;
+ case SIGPIPE: fprintf(stderr,"Signal received: SIGPIPE\n"); break;
+ case SIGQUIT: fprintf(stderr,"Signal received: SIGQUIT\n"); break;
+ case SIGSEGV: fprintf(stderr,"Signal received: SIGSEGV\n"); break;
+ case SIGSYS: fprintf(stderr,"Signal received: SIGSYS\n"); break;
+ case SIGTERM: fprintf(stderr,"Signal received: SIGTERM\n"); break;
+ case SIGTRAP: fprintf(stderr,"Signal received: SIGTRAP\n"); break;
+ case SIGXCPU: fprintf(stderr,"Signal received: SIGXCPU\n"); break;
+ case SIGXFSZ: fprintf(stderr,"Signal received: SIGXFSZ\n"); break;
}
+ Access_emergency_cleanup();
+
+#if 0
+ /* Appears to hang */
+#ifdef USE_MPI
+ MPI_Barrier(MPI_COMM_WORLD);
+#endif
+#endif
#ifdef HAVE_PTHREAD
request = (Request_T) pthread_getspecific(global_request_key);
if (request == NULL) {
- fprintf(stderr,"Unable to retrieve request for thread\n");
+ /* fprintf(stderr,"Unable to retrieve request for thread\n"); */
} else {
queryseq1 = Request_queryseq1(request);
queryseq2 = Request_queryseq2(request);
if (queryseq1 == NULL) {
fprintf(stderr,"Unable to retrieve queryseq for request\n");
} else {
- fprintf(stderr,"Problem sequence: ");
- fprintf(stderr,"%s (%d bp)\n",Shortread_accession(queryseq1),Shortread_fulllength(queryseq1));
- if (queryseq2 == NULL) {
- Shortread_print_query_singleend_fasta(stderr,queryseq1,/*headerseq*/queryseq1);
- } else {
- Shortread_print_query_pairedend_fasta(stderr,queryseq1,queryseq2,
- invert_first_p,invert_second_p);
+ fprintf(stderr,"Problem sequence: ");
+ fprintf(stderr,"%s (%d bp)\n",Shortread_accession(queryseq1),Shortread_fulllength(queryseq1));
+ if (queryseq2 == NULL) {
+ Shortread_stderr_query_singleend_fasta(queryseq1,/*headerseq*/queryseq1);
+ } else {
+ Shortread_stderr_query_pairedend_fasta(queryseq1,queryseq2,invert_first_p,invert_second_p);
}
}
}
#endif
- abort();
+ exit(9);
+
return;
}
-
#endif
@@ -979,10 +1023,9 @@ static void
single_thread () {
Floors_T *floors_array;
Request_T request;
- Result_T result;
+ Filestring_T fp, fp_failedinput_1, fp_failedinput_2;
Shortread_T queryseq1;
int i;
- int noutput = 0;
Stopwatch_T worker_stopwatch;
/* For GMAP */
@@ -992,10 +1035,11 @@ single_thread () {
Diagpool_T diagpool;
Cellpool_T cellpool;
int jobid = 0;
+ double worker_runtime;
#ifdef MEMUSAGE
- long int memusage_constant = 0;
- char *comma1, *comma2, *comma3, *comma4, *comma5;
+ long int memusage_constant = 0, memusage;
+ char acc[100+1], comma0[20], comma1[20], comma2[20], comma3[20], comma4[20], comma5[20];
#endif
oligoindices_major = Oligoindex_array_new_major(MAX_QUERYLENGTH_FOR_ALLOC,MAX_GENOMICLENGTH_FOR_ALLOC);
@@ -1012,20 +1056,37 @@ single_thread () {
worker_stopwatch = (timingp == true) ? Stopwatch_new() : (Stopwatch_T) NULL;
floors_array = (Floors_T *) CALLOC(MAX_READLENGTH+1,sizeof(Floors_T));
+
/* Except_stack_create(); -- requires pthreads */
#ifdef MEMUSAGE
memusage_constant += Mem_usage_report_std_heap();
+ Genomicpos_commafmt_fill(comma0,memusage_constant);
Mem_usage_reset_heap_baseline(0);
- printf("Initial memusage of single thread: %ld\n",Mem_usage_report_std_heap());
#endif
while ((request = Inbuffer_get_request(inbuffer)) != NULL) {
+#ifdef USE_MPI
+ debug(printf("rank %d, ",myid));
+#endif
debug(printf("single_thread got request %d\n",Request_id(request)));
+#ifdef MEMUSAGE
+ queryseq1 = Request_queryseq1(request);
+ fprintf(stderr,"Single thread starting %s\n",Shortread_accession(queryseq1));
+ Mem_usage_reset_stack_max();
+ Mem_usage_reset_heap_max();
+#endif
+
TRY
- result = process_request(request,floors_array,oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,worker_stopwatch);
+ fp = process_request(&fp_failedinput_1,&fp_failedinput_2,&worker_runtime,
+ request,floors_array,oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,worker_stopwatch);
+ if (timingp == true) {
+ queryseq1 = Request_queryseq1(request);
+ printf("%s\t%.6f\n",Shortread_accession(queryseq1),worker_runtime);
+ }
+
ELSE
queryseq1 = Request_queryseq1(request);
if (queryseq1 == NULL) {
@@ -1038,10 +1099,10 @@ single_thread () {
}
fprintf(stderr,"\n");
if (Request_queryseq2(request) == NULL) {
- Shortread_print_query_singleend_fasta(stderr,queryseq1,/*headerseq*/queryseq1);
+ Shortread_stderr_query_singleend_fasta(queryseq1,/*headerseq*/queryseq1);
} else {
- Shortread_print_query_pairedend_fasta(stderr,queryseq1,Request_queryseq2(request),
- invert_first_p,invert_second_p);
+ Shortread_stderr_query_pairedend_fasta(queryseq1,Request_queryseq2(request),
+ invert_first_p,invert_second_p);
}
fprintf(stderr,"\n");
fprintf(stderr,"To obtain a core dump, re-run program on problem sequence with the -0 [zero] flag\n");
@@ -1051,57 +1112,47 @@ single_thread () {
RERAISE;
END_TRY;
-#ifdef MEMUSAGE
- Outbuffer_print_result(outbuffer,result,request,noutput+1);
-#else
- Outbuffer_print_result(outbuffer,result,request);
-#endif
+ Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2);
- Result_free(&result);
+ if (jobid % POOL_FREE_INTERVAL == 0) {
+ Pairpool_free_memory(pairpool);
+ Diagpool_free_memory(diagpool);
+ Cellpool_free_memory(cellpool);
+ }
#ifdef MEMUSAGE
- /* Run with a single thread (-t 0), which should bring usage back down to 0 after each read */
-#if 0
- printf("Memusage of single thread: standard %ld, keep %ld\n",Mem_usage_report_std_heap(),Mem_usage_report_keep());
- printf("Memusage of OUT: %ld\n",Mem_usage_report_out());
- assert(Mem_usage_report_std() == 0);
- assert(Mem_usage_report_out() == 0);
+ /* Copy acc before we free the request */
+ queryseq1 = Request_queryseq1(request);
+ strncpy(acc,Shortread_accession(queryseq1),100);
+ acc[100] = '\0';
#endif
- queryseq1 = Request_queryseq1(request);
- comma1 = Genomicpos_commafmt(Mem_usage_report_std_heap_max());
- comma2 = Genomicpos_commafmt(Mem_usage_report_std_heap());
- comma3 = Genomicpos_commafmt(Mem_usage_report_keep());
- comma4 = Genomicpos_commafmt(Mem_usage_report_in());
- comma5 = Genomicpos_commafmt(Mem_usage_report_out());
-
- fprintf(stderr,"Acc %s: max %s std %s keep %s in %s out %s\n",
- Shortread_accession(queryseq1),comma1,comma2,comma3,comma4,comma5);
- FREE(comma5);
- FREE(comma4);
- FREE(comma3);
- FREE(comma2);
- FREE(comma1);
-#endif
-
- /* Allocated by fill_buffer in Inbuffer_get_request */
Request_free(&request);
- noutput++;
- if (jobid % POOL_FREE_INTERVAL == 0) {
- Pairpool_free_memory(pairpool);
- Diagpool_free_memory(diagpool);
- Cellpool_free_memory(cellpool);
- }
+#ifdef MEMUSAGE
+ Genomicpos_commafmt_fill(comma1,Mem_usage_report_std_heap_max());
+ Genomicpos_commafmt_fill(comma2,Mem_usage_report_std_heap());
+ Genomicpos_commafmt_fill(comma3,Mem_usage_report_keep());
+ Genomicpos_commafmt_fill(comma4,Mem_usage_report_in());
+ Genomicpos_commafmt_fill(comma5,Mem_usage_report_out());
- }
+ fprintf(stderr,"Acc %s: constant %s max %s std %s keep %s in %s out %s\n",
+ acc,comma0,comma1,comma2,comma3,comma4,comma5);
- /* Except_stack_destroy(); -- requires pthreads */
+ if ((memusage = Mem_usage_report_std_heap()) != 0) {
+ fprintf(stderr,"Memory leak in single thread of %ld bytes\n",memusage);
+ fflush(stdout);
+ exit(9);
+ }
+#endif
+ }
#ifdef MEMUSAGE
Mem_usage_std_heap_add(memusage_constant);
#endif
+ /* Except_stack_destroy(); -- requires pthreads */
+
for (i = 0; i <= MAX_READLENGTH; i++) {
if (floors_array[i] != NULL) {
Floors_free_keep(&(floors_array[i]));
@@ -1121,6 +1172,10 @@ single_thread () {
Oligoindex_array_free(&oligoindices_minor);
Oligoindex_array_free(&oligoindices_major);
+#ifdef MEMUSAGE
+ Mem_usage_set_threadname("main");
+#endif
+
return;
}
@@ -1128,10 +1183,9 @@ single_thread () {
#ifdef HAVE_PTHREAD
static void *
worker_thread (void *data) {
- long int worker_id = (long int) data;
Floors_T *floors_array;
Request_T request;
- Result_T result;
+ Filestring_T fp, fp_failedinput_1, fp_failedinput_2;
Shortread_T queryseq1;
int i;
Stopwatch_T worker_stopwatch;
@@ -1143,15 +1197,24 @@ worker_thread (void *data) {
Diagpool_T diagpool;
Cellpool_T cellpool;
int worker_jobid = 0;
+ double worker_runtime;
+#if defined(DEBUG) || defined(MEMUSAGE)
+ long int worker_id = (long int) data;
+#endif
#ifdef MEMUSAGE
- long int memusage_constant = 0, memusage, max_memusage;
+ long int memusage_constant = 0, memusage;
char threadname[12];
- char *comma1, *comma2, *comma3, *comma4, *comma5;
+ char acc[100+1], comma0[20], comma1[20], comma2[20], comma3[20], comma4[20], comma5[20];
sprintf(threadname,"thread-%ld",worker_id);
Mem_usage_set_threadname(threadname);
#endif
+#ifdef USE_MPI
+ debug(fprintf(stderr,"rank %d, ",myid));
+#endif
+ debug(fprintf(stderr,"worker_thread %ld starting\n",worker_id));
+
/* Thread-specific data and storage */
oligoindices_major = Oligoindex_array_new_major(MAX_QUERYLENGTH_FOR_ALLOC,MAX_GENOMICLENGTH_FOR_ALLOC);
oligoindices_minor = Oligoindex_array_new_minor(MAX_QUERYLENGTH_FOR_ALLOC,MAX_GENOMICLENGTH_FOR_ALLOC);
@@ -1167,41 +1230,39 @@ worker_thread (void *data) {
worker_stopwatch = (timingp == true) ? Stopwatch_new() : (Stopwatch_T) NULL;
floors_array = (Floors_T *) CALLOC(MAX_READLENGTH+1,sizeof(Floors_T));
+
Except_stack_create();
#ifdef MEMUSAGE
memusage_constant += Mem_usage_report_std_heap();
+ Genomicpos_commafmt_fill(comma0,memusage_constant);
Mem_usage_reset_heap_baseline(0);
#endif
while ((request = Inbuffer_get_request(inbuffer)) != NULL) {
- debug(printf("worker_thread %ld got request %d\n",worker_id,Request_id(request)));
+#ifdef USE_MPI
+ debug(fprintf(stderr,"rank %d, ",myid));
+#endif
+ debug(fprintf(stderr,"worker_thread %ld got request %d (%s)\n",
+ worker_id,Request_id(request),Shortread_accession(Request_queryseq1(request))));
pthread_setspecific(global_request_key,(void *) request);
- if (worker_jobid % POOL_FREE_INTERVAL == 0) {
- Pairpool_free_memory(pairpool);
- Diagpool_free_memory(diagpool);
- Cellpool_free_memory(cellpool);
- }
-
#ifdef MEMUSAGE
- memusage = Mem_usage_report_std_heap();
- printf("Memusage of worker thread %ld: %ld\n",worker_id,memusage);
- if (memusage != 0) {
- fprintf(stderr,"Memusage of worker thread %ld: %ld\n",worker_id,memusage);
- fflush(stdout);
- exit(9);
- }
+ queryseq1 = Request_queryseq1(request);
+ fprintf(stderr,"Thread %d starting %s\n",worker_id,Shortread_accession(queryseq1));
+ Mem_usage_reset_stack_max();
Mem_usage_reset_heap_max();
#endif
TRY
-#ifdef MEMUSAGE
- queryseq1 = Request_queryseq1(request);
- fprintf(stderr,"Thread %d starting %s\n",worker_id,Shortread_accession(queryseq1));
-#endif
- result = process_request(request,floors_array,oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,worker_stopwatch);
+ fp = process_request(&fp_failedinput_1,&fp_failedinput_2,&worker_runtime,
+ request,floors_array,oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,worker_stopwatch);
+ if (timingp == true) {
+ queryseq1 = Request_queryseq1(request);
+ printf("%s\t%.6f\n",Shortread_accession(queryseq1),worker_runtime);
+ }
+
ELSE
queryseq1 = Request_queryseq1(request);
if (queryseq1 == NULL) {
@@ -1214,10 +1275,10 @@ worker_thread (void *data) {
}
fprintf(stderr,"\n");
if (Request_queryseq2(request) == NULL) {
- Shortread_print_query_singleend_fasta(stderr,queryseq1,/*headerseq*/queryseq1);
+ Shortread_stderr_query_singleend_fasta(queryseq1,/*headerseq*/queryseq1);
} else {
- Shortread_print_query_pairedend_fasta(stderr,queryseq1,Request_queryseq2(request),
- invert_first_p,invert_second_p);
+ Shortread_stderr_query_pairedend_fasta(queryseq1,Request_queryseq2(request),
+ invert_first_p,invert_second_p);
}
fprintf(stderr,"\n");
fprintf(stderr,"To obtain a core dump, re-run program on problem sequence with the -0 [zero] flag\n");
@@ -1227,28 +1288,39 @@ worker_thread (void *data) {
RERAISE;
END_TRY;
+ Outbuffer_put_filestrings(outbuffer,fp,fp_failedinput_1,fp_failedinput_2);
+
+ if (worker_jobid % POOL_FREE_INTERVAL == 0) {
+ Pairpool_free_memory(pairpool);
+ Diagpool_free_memory(diagpool);
+ Cellpool_free_memory(cellpool);
+ }
+
#ifdef MEMUSAGE
+ /* Copy acc before we free the request */
queryseq1 = Request_queryseq1(request);
- comma1 = Genomicpos_commafmt(Mem_usage_report_std_heap_max());
- comma2 = Genomicpos_commafmt(Mem_usage_report_std_heap());
- comma3 = Genomicpos_commafmt(Mem_usage_report_keep());
- comma4 = Genomicpos_commafmt(Mem_usage_report_in());
- comma5 = Genomicpos_commafmt(Mem_usage_report_out());
-
- fprintf(stderr,"Acc %s, thread %d: max %s std %s keep %s in %s out %s\n",
- Shortread_accession(queryseq1),worker_id,comma1,comma2,comma3,comma4,comma5);
- FREE(comma5);
- FREE(comma4);
- FREE(comma3);
- FREE(comma2);
- FREE(comma1);
+ strncpy(acc,Shortread_accession(queryseq1),100);
+ acc[100] = '\0';
#endif
- debug(printf("worker_thread putting result %d\n",Result_id(result)));
+ Request_free(&request);
+
+#ifdef MEMUSAGE
+ Genomicpos_commafmt_fill(comma1,Mem_usage_report_std_heap_max());
+ Genomicpos_commafmt_fill(comma2,Mem_usage_report_std_heap());
+ Genomicpos_commafmt_fill(comma3,Mem_usage_report_keep());
+ Genomicpos_commafmt_fill(comma4,Mem_usage_report_in());
+ Genomicpos_commafmt_fill(comma5,Mem_usage_report_out());
- Outbuffer_put_result(outbuffer,result,request);
+ fprintf(stderr,"Acc %s, thread %d: constant %s max %s std %s keep %s in %s out %s\n",
+ acc,worker_id,comma0,comma1,comma2,comma3,comma4,comma5);
- /* Don't free result or request; done by outbuffer thread */
+ if ((memusage = Mem_usage_report_std_heap()) != 0) {
+ fprintf(stderr,"Memory leak in worker thread %ld of %ld bytes\n",worker_id,memusage);
+ fflush(stdout);
+ exit(9);
+ }
+#endif
}
#ifdef MEMUSAGE
@@ -1276,6 +1348,15 @@ worker_thread (void *data) {
Oligoindex_array_free(&oligoindices_minor);
Oligoindex_array_free(&oligoindices_major);
+#ifdef MEMUSAGE
+ Mem_usage_set_threadname("main");
+#endif
+
+#ifdef USE_MPI
+ debug(fprintf(stderr,"rank %d, ",myid));
+#endif
+ debug(fprintf(stderr,"worker_thread %ld finished\n",worker_id));
+
return (void *) NULL;
}
#endif
@@ -1497,62 +1578,22 @@ check_valid_float_or_int (char *string) {
}
-int
-main (int argc, char *argv[]) {
- char *genomesubdir = NULL, *snpsdir = NULL, *modedir = NULL, *mapdir = NULL, *iitfile = NULL, *fileroot = NULL;
- FILE *input = NULL, *input2 = NULL;
-#ifdef HAVE_ZLIB
- gzFile gzipped = NULL, gzipped2 = NULL;
-#endif
-
-#ifdef HAVE_BZLIB
- Bzip2_T bzipped = NULL, bzipped2 = NULL;
-#endif
-
- bool multiple_sequences_p = false;
- char **files;
- int nfiles, nextchar = '\0';
- long int worker_id;
-
- unsigned int nread;
- double runtime;
-
- Splicestringpool_T splicestringpool;
-
-#ifdef HAVE_PTHREAD
- int ret;
- pthread_attr_t thread_attr_join;
-#ifdef WORKER_DETACH
- pthread_attr_t thread_attr_detach;
-#endif
-#endif
-
+static int
+parse_command_line (int argc, char *argv[], int optind) {
int opt, c;
- extern int optind;
extern char *optarg;
int long_option_index = 0;
const char *long_name;
char **argstart;
char *string;
-#ifdef HAVE_SIGACTION
- struct sigaction signal_action;
-#endif
-
-#ifdef MEMUSAGE
- Mem_usage_init();
- Mem_usage_set_threadname("main");
-#endif
-
-
fprintf(stderr,"GSNAP version %s called with args:",PACKAGE_VERSION);
argstart = &(argv[-optind]);
for (c = 1; c < argc + optind; c++) {
- fprintf(stderr," %s",argstart[c]);
+ fprintf(stderr," %s",argstart[c]);
}
fprintf(stderr,"\n");
-
while ((opt = getopt_long(argc,argv,
"D:d:k:Gq:o:a:N:M:m:i:y:Y:z:Z:w:E:e:J:K:l:g:s:V:v:B:t:A:j:0n:QO",
long_options, &long_option_index)) != -1) {
@@ -1561,14 +1602,14 @@ main (int argc, char *argv[]) {
long_name = long_options[long_option_index].name;
if (!strcmp(long_name,"version")) {
print_program_version();
- exit(0);
+ return 1;
} else if (!strcmp(long_name,"check")) {
check_compiler_assumptions();
- exit(0);
+ return 1;
} else if (!strcmp(long_name,"help")) {
print_program_usage();
- exit(0);
-
+ return 1;
+
#ifdef LARGE_GENOMES
} else if (!strcmp(long_name,"use-sarray")) {
if (!strcmp(optarg,"0")) {
@@ -1576,8 +1617,9 @@ main (int argc, char *argv[]) {
use_only_sarray_p = false;
} else {
fprintf(stderr,"--use-sarray flag for large genomes must be 0\n");
- exit(9);
+ return 9;
}
+
#else
} else if (!strcmp(long_name,"use-sarray")) {
if (!strcmp(optarg,"2")) {
@@ -1591,10 +1633,20 @@ main (int argc, char *argv[]) {
use_only_sarray_p = false;
} else {
fprintf(stderr,"--use-sarray flag must be 0, 1, or 2\n");
- exit(9);
+ return 9;
}
#endif
+ } else if (!strcmp(long_name,"use-shared-memory")) {
+ if (!strcmp(optarg,"1")) {
+ sharedp = true;
+ } else if (!strcmp(optarg,"0")) {
+ sharedp = false;
+ } else {
+ fprintf(stderr,"--use-shared-memory flag must be 0 or 1\n");
+ return 9;
+ }
+
} else if (!strcmp(long_name,"expand-offsets")) {
if (!strcmp(optarg,"1")) {
expand_offsets_p = true;
@@ -1602,7 +1654,7 @@ main (int argc, char *argv[]) {
expand_offsets_p = false;
} else {
fprintf(stderr,"--expand-offsets flag must be 0 or 1\n");
- exit(9);
+ return 9;
}
} else if (!strcmp(long_name,"sampling")) {
@@ -1630,29 +1682,43 @@ main (int argc, char *argv[]) {
mode = ATOI_NONSTRANDED;
} else {
fprintf(stderr,"--mode must be standard, cmet-stranded, cmet-nonstranded, atoi-stranded, or atoi-nonstranded\n");
- exit(9);
+ return 9;
}
} else if (!strcmp(long_name,"cmetdir")) {
user_cmetdir = optarg;
+
} else if (!strcmp(long_name,"atoidir")) {
user_atoidir = optarg;
} else if (!strcmp(long_name,"novelend-splicedist")) {
- shortsplicedist_novelend = strtoul(optarg,NULL,10);
+ shortsplicedist_novelend = (Chrpos_T) strtoul(optarg,NULL,10);
} else if (!strcmp(long_name,"splicingdir")) {
user_splicingdir = optarg;
+
} else if (!strcmp(long_name,"ambig-splice-noclip")) {
amb_clip_p = false;
+ } else if (!strcmp(long_name,"find-dna-chimeras")) {
+ if (!strcmp(optarg,"1")) {
+ find_dna_chimeras_p = true;
+ } else if (!strcmp(optarg,"0")) {
+ find_dna_chimeras_p = false;
+ } else {
+ fprintf(stderr,"--find-dna-chimeras flag must be 0 or 1\n");
+ exit(9);
+ }
+
} else if (!strcmp(long_name,"tallydir")) {
user_tallydir = optarg;
+
} else if (!strcmp(long_name,"use-tally")) {
tally_root = optarg;
} else if (!strcmp(long_name,"runlengthdir")) {
user_runlengthdir = optarg;
+
} else if (!strcmp(long_name,"use-runlength")) {
runlength_root = optarg;
@@ -1666,44 +1732,58 @@ main (int argc, char *argv[]) {
} else if (!strcmp(long_name,"trigger-score-for-gmap")) {
trigger_score_for_gmap = atoi(check_valid_int(optarg));
+
} else if (!strcmp(long_name,"gmap-min-match-length")) {
gmap_min_nconsecutive = atoi(check_valid_int(optarg));
+
} else if (!strcmp(long_name,"gmap-allowance")) {
gmap_allowance = atoi(check_valid_int(optarg));
} else if (!strcmp(long_name,"max-gmap-pairsearch")) {
max_gmap_pairsearch = atoi(check_valid_int(optarg));
+
} else if (!strcmp(long_name,"max-gmap-terminal")) {
max_gmap_terminal = atoi(check_valid_int(optarg));
+
} else if (!strcmp(long_name,"max-gmap-improvement")) {
max_gmap_improvement = atoi(check_valid_int(optarg));
+
} else if (!strcmp(long_name,"microexon-spliceprob")) {
microexon_spliceprob = check_valid_float(optarg,long_name);
+
} else if (!strcmp(long_name,"stage2-start")) {
+ /* No longer used by stage 2 */
suboptimal_score_start = atoi(check_valid_int(optarg));
+
} else if (!strcmp(long_name,"stage2-end")) {
suboptimal_score_end = atoi(check_valid_int(optarg));
} else if (!strcmp(long_name,"input-buffer-size")) {
inbuffer_nspaces = atoi(check_valid_int(optarg));
+
} else if (!strcmp(long_name,"output-buffer-size")) {
output_buffer_size = atoi(check_valid_int(optarg));
+
} else if (!strcmp(long_name,"barcode-length")) {
barcode_length = atoi(check_valid_int(optarg));
+
} else if (!strcmp(long_name,"fastq-id-start")) {
acc_fieldi_start = atoi(check_valid_int(optarg)) - 1;
if (acc_fieldi_start < 0) {
fprintf(stderr,"Value for fastq-id-start must be 1 or greater\n");
- exit(9);
+ return 9;
}
+
} else if (!strcmp(long_name,"fastq-id-end")) {
acc_fieldi_end = atoi(check_valid_int(optarg)) - 1;
if (acc_fieldi_end < 0) {
fprintf(stderr,"Value for fastq-id-end must be 1 or greater\n");
- exit(9);
+ return 9;
}
+
} else if (!strcmp(long_name,"force-single-end")) {
force_single_end_p = true;
+
} else if (!strcmp(long_name,"filter-chastity")) {
if (!strcmp(optarg,"off")) {
filter_chastity_p = false;
@@ -1716,8 +1796,9 @@ main (int argc, char *argv[]) {
filter_if_both_p = true;
} else {
fprintf(stderr,"--filter-chastity values allowed: off, either, both\n");
- exit(9);
+ return 9;
}
+
} else if (!strcmp(long_name,"allow-pe-name-mismatch")) {
allow_paired_end_mismatch_p = true;
@@ -1725,16 +1806,35 @@ main (int argc, char *argv[]) {
} else if (!strcmp(long_name,"gunzip")) {
gunzip_p = true;
#endif
+
#ifdef HAVE_BZLIB
} else if (!strcmp(long_name,"bunzip2")) {
bunzip2_p = true;
#endif
+
+ } else if (!strcmp(long_name,"orientation")) {
+ if (!strcmp(optarg,"FR")) {
+ invert_first_p = false;
+ invert_second_p = true;
+ } else if (!strcmp(optarg,"RF")) {
+ invert_first_p = true;
+ invert_second_p = false;
+ } else if (!strcmp(optarg,"FF")) {
+ invert_first_p = invert_second_p = false;
+ } else {
+ fprintf(stderr,"Currently allowed values for orientation: FR (fwd-rev), RF (rev-fwd) or FF (fwd-fwd)\n");
+ return 9;
+ }
+
} else if (!strcmp(long_name,"split-output")) {
- sevenway_root = optarg;
+ split_output_root = optarg;
+
} else if (!strcmp(long_name,"failed-input")) {
failedinput_root = optarg;
+
} else if (!strcmp(long_name,"append-output")) {
appendp = true;
+
} else if (!strcmp(long_name,"order-among-best")) {
if (!strcmp(optarg,"genomic")) {
want_random_p = false;
@@ -1742,14 +1842,18 @@ main (int argc, char *argv[]) {
want_random_p = true;
} else {
fprintf(stderr,"--order-among-best values allowed: genomic, random (default)\n");
- exit(9);
+ return 9;
}
+
} else if (!strcmp(long_name,"pairmax-dna")) {
pairmax_dna = atoi(check_valid_int(optarg));
+
} else if (!strcmp(long_name,"pairmax-rna")) {
pairmax_rna = atoi(check_valid_int(optarg));
+
} else if (!strcmp(long_name,"pairexpect")) {
expected_pairlength = atoi(check_valid_int(optarg));
+
} else if (!strcmp(long_name,"pairdev")) {
pairlength_deviation = atoi(check_valid_int(optarg));
@@ -1782,8 +1886,9 @@ main (int argc, char *argv[]) {
query_unk_mismatch_p = false;
} else {
fprintf(stderr,"--query-unk-mismatch flag must be 0 or 1\n");
- exit(9);
+ return 9;
}
+
} else if (!strcmp(long_name,"genome-unk-mismatch")) {
if (!strcmp(optarg,"1")) {
genome_unk_mismatch_p = true;
@@ -1791,35 +1896,49 @@ main (int argc, char *argv[]) {
genome_unk_mismatch_p = false;
} else {
fprintf(stderr,"--genome-unk-mismatch flag must be 0 or 1\n");
- exit(9);
+ return 9;
}
} else if (!strcmp(long_name,"trim-mismatch-score")) {
trim_mismatch_score = atoi(check_valid_int(optarg));
+
} else if (!strcmp(long_name,"trim-indel-score")) {
trim_indel_score = atoi(check_valid_int(optarg));
+ } else if (!strcmp(long_name,"distant-splice-identity")) {
+ min_distantsplicing_identity = check_valid_float(optarg,long_name);
+
+ } else if (!strcmp(long_name,"force-xs-dir")) {
+ force_xs_direction_p = true;
+
} else if (!strcmp(long_name,"show-refdiff")) {
show_refdiff_p = true;
+
} else if (!strcmp(long_name,"clip-overlap")) {
clip_overlap_p = true;
+
} else if (!strcmp(long_name,"merge-overlap")) {
merge_overlap_p = true;
+
} else if (!strcmp(long_name,"no-sam-headers")) {
sam_headers_p = false;
+
} else if (!strcmp(long_name,"sam-headers-batch")) {
sam_headers_batch = atoi(check_valid_int(optarg));
+
} else if (!strcmp(long_name,"sam-use-0M")) {
sam_insert_0M_p = true;
+
} else if (!strcmp(long_name,"sam-multiple-primaries")) {
sam_multiple_primaries_p = true;
+
} else if (!strcmp(long_name,"quality-protocol")) {
if (user_quality_score_adj == true) {
fprintf(stderr,"Cannot specify both -J (--quality-zero-score) and --quality-protocol\n");
- exit(9);
+ return 9;
} else if (user_quality_shift == true) {
fprintf(stderr,"Cannot specify both -j (--quality-print-shift) and --quality-protocol\n");
- exit(9);
+ return 9;
} else if (!strcmp(optarg,"illumina")) {
MAPQ_init(/*quality_score_adj*/64);
Pair_init(/*quality_score_adj*/64);
@@ -1834,93 +1953,67 @@ main (int argc, char *argv[]) {
user_quality_shift = true;
} else {
fprintf(stderr,"The only values allowed for --quality-protocol are illumina or sanger\n");
- exit(9);
+ return 9;
}
- } else if (!strcmp(long_name,"force-xs-dir")) {
- force_xs_direction_p = true;
} else if (!strcmp(long_name,"md-lowercase-snp")) {
md_lowercase_variant_p = true;
+
} else if (!strcmp(long_name,"extend-soft-clips")) {
hide_soft_clips_p = true;
- } else if (!strcmp(long_name,"action-if-cigar-error")) {
- if (!strcmp(optarg,"ignore")) {
- cigar_action = CIGAR_ACTION_IGNORE;
- } else if (!strcmp(optarg,"warning")) {
- cigar_action = CIGAR_ACTION_WARNING;
- } else if (!strcmp(optarg,"abort")) {
- cigar_action = CIGAR_ACTION_ABORT;
- } else {
- fprintf(stderr,"action-if-cigar-error needs to be ignore, warning, or abort\n");
- exit(9);
- }
+
} else if (!strcmp(long_name,"read-group-id")) {
sam_read_group_id = optarg;
+
} else if (!strcmp(long_name,"read-group-name")) {
sam_read_group_name = optarg;
+
} else if (!strcmp(long_name,"read-group-library")) {
sam_read_group_library = optarg;
+
} else if (!strcmp(long_name,"read-group-platform")) {
sam_read_group_platform = optarg;
- } else if (!strcmp(long_name,"goby-output")) {
- goby_output_root = optarg;
- } else if (!strcmp(long_name,"distant-splice-identity")) {
- min_distantsplicing_identity = check_valid_float(optarg,long_name);
+
} else if (!strcmp(long_name,"print-snps")) {
print_snplabels_p = true;
+
} else if (!strcmp(long_name,"failsonly")) {
if (nofailsp == true) {
fprintf(stderr,"Cannot specify both --nofails and --failsonly\n");
- exit(9);
+ return 9;
} else {
failsonlyp = true;
}
} else if (!strcmp(long_name,"nofails")) {
if (failsonlyp == true) {
fprintf(stderr,"Cannot specify both --nofails and --failsonly\n");
- exit(9);
+ return 9;
} else {
nofailsp = true;
}
- } else if (!strcmp(long_name,"creads-window-start")) {
- creads_window_start = strtoul(check_valid_int(optarg),NULL,10);
- } else if (!strcmp(long_name,"creads-window-end")) {
- creads_window_end = strtoul(check_valid_int(optarg),NULL,10);
- } else if (!strcmp(long_name,"creads-complement")) {
- creads_complement_p = true;
} else {
/* Shouldn't reach here */
fprintf(stderr,"Don't recognize option %s. For usage, run 'gsnap --help'",long_name);
- exit(9);
+ return 9;
}
break;
case 'D': user_genomedir = optarg; break;
+
case 'd': dbroot = optarg; break;
+
case 'k':
required_index1part = atoi(check_valid_int(optarg));
if (required_index1part > 16) {
fprintf(stderr,"The value for k-mer size must be 16 or less\n");
- exit(9);
+ return 9;
}
break;
+
case 'G': uncompressedp = true; break;
case 'q': parse_part(&part_modulus,&part_interval,optarg); break;
- case 'o':
- if (!strcmp(optarg,"FR")) {
- invert_first_p = false;
- invert_second_p = true;
- } else if (!strcmp(optarg,"RF")) {
- invert_first_p = true;
- invert_second_p = false;
- } else if (!strcmp(optarg,"FF")) {
- invert_first_p = invert_second_p = false;
- } else {
- fprintf(stderr,"Currently allowed values for orientation (-o): FR (fwd-rev), RF (rev-fwd) or FF (fwd-fwd)\n");
- exit(9);
- }
- break;
+ case 'o': output_file = optarg; break;
case 'a':
if (!strcmp(optarg,"paired")) {
@@ -1929,7 +2022,7 @@ main (int argc, char *argv[]) {
chop_primers_p = false;
} else {
fprintf(stderr,"Currently allowed values for adapter stripping (-a): off, paired\n");
- exit(9);
+ return 9;
}
break;
@@ -1940,7 +2033,7 @@ main (int argc, char *argv[]) {
novelsplicingp = false;
} else {
fprintf(stderr,"Novel splicing (-N flag) must be 0 or 1\n");
- exit(9);
+ return 9;
}
break;
@@ -1964,7 +2057,7 @@ main (int argc, char *argv[]) {
fprintf(stderr," then no masking if necessary;\n");
fprintf(stderr," mode 4 does greedy masking of frequent and repetitive oligomers,\n");
fprintf(stderr," then no masking if necessary.\n");
- exit(9);
+ return 9;
}
break;
#endif
@@ -1974,11 +2067,11 @@ main (int argc, char *argv[]) {
user_maxlevel_float = atof(check_valid_float_or_int(optarg));
if (user_maxlevel_float > 1.0 && user_maxlevel_float != rint(user_maxlevel_float)) {
fprintf(stderr,"Cannot specify fractional value %f for --max-mismatches except between 0.0 and 1.0\n",user_maxlevel_float);
- exit(9);
+ return 9;
} else if (user_maxlevel_float > 0.10 && user_maxlevel_float < 1.0) {
fprintf(stderr,"Your value %f for --max-mismatches implies more than 10%% mismatches, which does not make sense\n",
user_maxlevel_float);
- exit(9);
+ return 9;
}
break;
@@ -1998,51 +2091,75 @@ main (int argc, char *argv[]) {
case 'g': genes_file = optarg; break;
- case 's': splicing_file = optarg; knownsplicingp = true; break;
+ case 's':
+ splicing_file = optarg;
+ knownsplicingp = true;
+ break;
case 'V': user_snpsdir = optarg; break;
+
case 'v': snps_root = optarg; break;
case 'B':
if (!strcmp(optarg,"5")) {
+#if 0
+ /* Not true. -B 5 allocates suffix array and suffix aux files */
fprintf(stderr,"Note: Batch mode 5 is now the same as batch mode 4.\n");
- fprintf(stderr,"Expansion of offsets is now controlled separately by --expand-offsets (default=1).\n");
+ fprintf(stderr,"Expansion of offsets is now controlled separately by --expand-offsets (default=0).\n");
+#endif
offsetsstrm_access = USE_ALLOCATE; /* Doesn't matter */
positions_access = USE_ALLOCATE;
genome_access = USE_ALLOCATE;
sarray_access = USE_ALLOCATE;
- aux_access = USE_ALLOCATE;
+ lcp_access = USE_ALLOCATE;
+ guideexc_access = USE_ALLOCATE;
+ indexij_access = USE_ALLOCATE;
+
+#ifdef HAVE_MMAP
} else if (!strcmp(optarg,"4")) {
offsetsstrm_access = USE_ALLOCATE;
positions_access = USE_ALLOCATE;
genome_access = USE_ALLOCATE;
sarray_access = USE_MMAP_PRELOAD;
- aux_access = USE_ALLOCATE;
-#ifdef HAVE_MMAP
+ lcp_access = USE_MMAP_PRELOAD;
+ guideexc_access = USE_ALLOCATE;
+ indexij_access = USE_ALLOCATE;
+
} else if (!strcmp(optarg,"3")) {
offsetsstrm_access = USE_ALLOCATE;
positions_access = USE_ALLOCATE;
genome_access = USE_MMAP_PRELOAD; /* was batch_genome_p = true */
sarray_access = USE_MMAP_ONLY;
- aux_access = USE_MMAP_PRELOAD;
+ lcp_access = USE_MMAP_PRELOAD;
+ guideexc_access = USE_MMAP_PRELOAD;
+ indexij_access = USE_ALLOCATE;
+
} else if (!strcmp(optarg,"2")) {
offsetsstrm_access = USE_ALLOCATE; /* was batch_offsets_p = true */
positions_access = USE_MMAP_PRELOAD; /* was batch_positions_p = true */
genome_access = USE_MMAP_PRELOAD; /* was batch_genome_p = true */
sarray_access = USE_MMAP_ONLY;
- aux_access = USE_MMAP_ONLY;
+ lcp_access = USE_MMAP_ONLY;
+ guideexc_access = USE_MMAP_ONLY;
+ indexij_access = USE_ALLOCATE;
+
} else if (!strcmp(optarg,"1")) {
offsetsstrm_access = USE_ALLOCATE; /* was batch_offsets_p = true */
positions_access = USE_MMAP_PRELOAD; /* was batch_positions_p = true */
genome_access = USE_MMAP_ONLY; /* was batch_genome_p = false */
sarray_access = USE_MMAP_ONLY;
- aux_access = USE_MMAP_ONLY;
+ guideexc_access = USE_MMAP_ONLY;
+ indexij_access = USE_ALLOCATE;
+
} else if (!strcmp(optarg,"0")) {
offsetsstrm_access = USE_ALLOCATE; /* was batch_offsets_p = true */
positions_access = USE_MMAP_ONLY; /* was batch_positions_p = false */
genome_access = USE_MMAP_ONLY; /* was batch_genome_p = false */
sarray_access = USE_MMAP_ONLY;
- aux_access = USE_MMAP_ONLY;
+ lcp_access = USE_MMAP_ONLY;
+ guideexc_access = USE_MMAP_ONLY;
+ indexij_access = USE_ALLOCATE;
+
#endif
} else {
#ifdef HAVE_MMAP
@@ -2050,12 +2167,12 @@ main (int argc, char *argv[]) {
#else
fprintf(stderr,"Batch mode %s not recognized. Only allow 4-5, since mmap is disabled. Run 'gsnap --help' for more information.\n",optarg);
#endif
- exit(9);
+ return 9;
}
break;
-#ifdef HAVE_PTHREAD
- case 't': nworkers = atoi(check_valid_int(optarg)); break;
+#if defined(HAVE_PTHREAD)
+ case 't': nthreads = atoi(check_valid_int(optarg)); break;
#else
case 't': fprintf(stderr,"This version of GSNAP has pthreads disabled, so ignoring the value of %s for -t\n",optarg); break;
#endif
@@ -2063,20 +2180,18 @@ main (int argc, char *argv[]) {
case 'A':
if (!strcmp(optarg,"sam")) {
output_sam_p = true;
- } else if (!strcmp(optarg,"goby")) {
- output_goby_p = true;
} else if (!strcmp(optarg,"m8")) {
print_m8_p = true;
} else {
- fprintf(stderr,"Output format %s not recognized. Allowed values: sam, m8, goby\n",optarg);
- exit(9);
+ fprintf(stderr,"Output format %s not recognized. Allowed values: sam, m8\n",optarg);
+ return 9;
}
break;
case 'j':
if (user_quality_shift == true) {
fprintf(stderr,"Cannot specify both -j (--quality-print-shift) and --quality-protocol\n");
- exit(9);
+ return 9;
} else {
quality_shift = atoi(check_valid_int(optarg));
user_quality_shift = true;
@@ -2086,7 +2201,7 @@ main (int argc, char *argv[]) {
case 'J':
if (user_quality_score_adj == true) {
fprintf(stderr,"Cannot specify both -J (--quality-zero-score) and --quality-protocol\n");
- exit(9);
+ return 9;
} else {
MAPQ_init(/*quality_score_adj*/atoi(check_valid_int(optarg)));
Pair_init(/*quality_score_adj*/atoi(check_valid_int(optarg)));
@@ -2100,64 +2215,43 @@ main (int argc, char *argv[]) {
case 'O': orderedp = true; break;
- case '?': fprintf(stderr,"For usage, run 'gsnap --help'\n"); exit(9);
- default: exit(9);
+ case '?': fprintf(stderr,"For usage, run 'gsnap --help'\n"); return 9;
+ default: return 9;
}
}
- argc -= optind;
- argv += optind;
-
-
- check_compiler_assumptions();
-
- if (exception_raise_p == false) {
- fprintf(stderr,"Allowing signals and exceptions to pass through\n");
- Except_inactivate();
- } else {
-#ifdef HAVE_SIGACTION
- signal_action.sa_handler = signal_handler;
- signal_action.sa_flags = 0;
- sigfillset(&signal_action.sa_mask);
-
- sigaction(SIGFPE,&signal_action,NULL);
- sigaction(SIGSEGV,&signal_action,NULL);
- sigaction(SIGTRAP,&signal_action,NULL);
- sigaction(SIGUSR1,&signal_action,NULL);
-#endif
- }
-
+ /* Make inferences */
if (dbroot == NULL) {
fprintf(stderr,"Need to specify the -d flag. For usage, run 'gsnap --help'\n");
/* print_program_usage(); */
- exit(9);
+ return 9;
}
if (acc_fieldi_end < acc_fieldi_start) {
fprintf(stderr,"--fastq-id-end must be equal to or greater than --fastq-id-start\n");
- exit(9);
- } else {
- Shortread_setup(acc_fieldi_start,acc_fieldi_end,force_single_end_p,filter_chastity_p,
- allow_paired_end_mismatch_p);
+ return 9;
}
if (clip_overlap_p == true && merge_overlap_p == true) {
fprintf(stderr,"Cannot specify both --clip-overlap and --merge-overlap. Please choose one.\n");
- exit(9);
+ return 9;
}
if (novelsplicingp == true && knownsplicingp == true) {
fprintf(stderr,"Novel splicing (-N) and known splicing (-s) both turned on => assume reads are RNA-Seq\n");
+ find_dna_chimeras_p = false;
pairmax = pairmax_rna;
shortsplicedist_known = shortsplicedist;
} else if (knownsplicingp == true) {
fprintf(stderr,"Known splicing (-s) turned on => assume reads are RNA-Seq\n");
+ find_dna_chimeras_p = false;
pairmax = pairmax_rna;
shortsplicedist_known = shortsplicedist;
} else if (novelsplicingp == true) {
fprintf(stderr,"Novel splicing (-N) turned on => assume reads are RNA-Seq\n");
+ find_dna_chimeras_p = false;
pairmax = pairmax_rna;
shortsplicedist_known = 0;
@@ -2178,7 +2272,7 @@ main (int argc, char *argv[]) {
if (distantsplicing_penalty < localsplicing_penalty) {
fprintf(stderr,"The distant splicing penalty %d cannot be less than local splicing penalty %d\n",
distantsplicing_penalty,localsplicing_penalty);
- exit(9);
+ return 9;
}
if (sam_headers_batch >= 0) {
@@ -2200,202 +2294,303 @@ main (int argc, char *argv[]) {
/* orientation FR */
} else {
fprintf(stderr,"Adapter stripping not currently implemented for given orientation\n");
- exit(9);
+ return 9;
}
}
+#ifdef USE_MPI
+ /* Code does allow for MPI output to stdout, but appears not to work
+ yet, and may not work if rank 0 is also a worker */
+ if (split_output_root == NULL && output_file == NULL) {
+ fprintf(stderr,"For MPI version, need to specify either --split-output or --output-file\n");
+ return 9;
+ }
+#endif
+
+ return 0;
+}
+
+
+static bool
+open_input_streams_parser (int *nextchar, int *nchars1, int *nchars2, char ***files, int *nfiles,
+ FILE **input, FILE **input2,
+#ifdef HAVE_ZLIB
+ gzFile *gzipped, gzFile *gzipped2,
+#endif
+#ifdef HAVE_BZLIB
+ Bzip2_T *bzipped, Bzip2_T *bzipped2,
+#endif
+ bool gunzip_p, bool bunzip2_p,
+ int argc, char **argv) {
+ bool fastq_format_p = false;
+
+ *input = *input2 = NULL;
+#ifdef HAVE_ZLIB
+ *gzipped = *gzipped2 = NULL;
+#endif
+#ifdef HAVE_BZLIB
+ *bzipped = *bzipped2 = NULL;
+#endif
/* Open input stream and peek at first char */
if (argc == 0) {
+#ifdef USE_MPI
+ fprintf(stderr,"For mpi_gsnap, cannot read from stdin\n");
+ exit(9);
+#else
fprintf(stderr,"Reading from stdin\n");
- input = stdin;
- files = (char **) NULL;
- nfiles = 0;
- nextchar = Shortread_input_init(input);
- if (nextchar == 0xFF) {
- fprintf(stderr,"Input appears to be a compact-reads file, which is not allowed as stdin.\n");
- exit(9);
- }
+ *input = stdin;
+ *files = (char **) NULL;
+ *nfiles = 0;
+ *nextchar = Shortread_input_init(&(*nchars1),*input);
+#endif
} else {
- files = argv;
- nfiles = argc;
+ *files = argv;
+ *nfiles = argc;
if (gunzip_p == true) {
#ifdef HAVE_ZLIB
- if ((gzipped = gzopen(files[0],"rb")) == NULL) {
- fprintf(stderr,"Cannot open gzipped file %s\n",files[0]);
+ if ((*gzipped = gzopen((*files)[0],"rb")) == NULL) {
+ fprintf(stderr,"Cannot open gzipped file %s\n",(*files)[0]);
exit(9);
} else {
#ifdef HAVE_ZLIB_GZBUFFER
- gzbuffer(gzipped,GZBUFFER_SIZE);
+ gzbuffer(*gzipped,GZBUFFER_SIZE);
#endif
- nextchar = Shortread_input_init_gzip(gzipped);
+ *nextchar = Shortread_input_init_gzip(*gzipped);
}
#endif
} else if (bunzip2_p == true) {
#ifdef HAVE_BZLIB
- if ((bzipped = Bzip2_new(files[0])) == NULL) {
- fprintf(stderr,"Cannot open bzipped file %s\n",files[0]);
+ if ((*bzipped = Bzip2_new((*files)[0])) == NULL) {
+ fprintf(stderr,"Cannot open bzipped file %s\n",(*files)[0]);
exit(9);
} else {
- nextchar = Shortread_input_init_bzip2(bzipped);
+ *nextchar = Shortread_input_init_bzip2(*bzipped);
}
#endif
} else {
- if ((input = FOPEN_READ_TEXT(files[0])) == NULL) {
- fprintf(stderr,"Cannot open file %s\n",files[0]);
+ if ((*input = FOPEN_READ_TEXT((*files)[0])) == NULL) {
+ fprintf(stderr,"Cannot open file %s\n",(*files)[0]);
exit(9);
} else {
- nextchar = Shortread_input_init(input);
- if (nextchar == 0xFF) {
- fclose(input);
- input = (FILE *) NULL;
- gobyreader = Goby_reader_new(files,nfiles,creads_window_start,creads_window_end,creads_complement_p);
- creads_format_p = true;
- }
+ debugf(fprintf(stderr,"Master opening file %s using fopen\n",(*files)[0]));
+ *nextchar = Shortread_input_init(&(*nchars1),*input);
}
}
- files++;
- nfiles--;
+ (*files)++;
+ (*nfiles)--;
}
/* Interpret first char to determine input type */
- if (nextchar == EOF) {
+ if (*nextchar == EOF) {
fprintf(stderr,"Input is empty\n");
exit(9);
-#ifdef HAVE_GOBY
- } else if (creads_format_p == true) {
- if (user_quality_score_adj == false) {
- /* Use Goby default of 0, keeping Phred scores. It is not
- recommended that you override this value with -J x when
- reading from Goby compact reads files. */
- MAPQ_init(/*quality_score_adj*/0);
- Pair_init(/*quality_score_adj*/0);
- }
- if (user_quality_shift == false) {
- /* By default, when outputting a non-Goby compact alignment
- format (gsnap, sam), this will output Sanger quality scores,
- equivalent to "-j 33". If you prefer to output Illumina
- quality scores, use "-j 64". Goby compact alignment output
- always uses Phred scores, ignoring this quality_shift value. */
- quality_shift = 33;
- }
-#endif
-
- } else if (nextchar == '@') {
+ } else if (*nextchar == '@') {
/* Looks like a FASTQ file */
- if (nfiles == 0 || force_single_end_p == true) {
+ if (*nfiles == 0 || force_single_end_p == true) {
#ifdef HAVE_ZLIB
- gzipped2 = (gzFile) NULL;
+ *gzipped2 = (gzFile) NULL;
#endif
#ifdef HAVE_BZLIB
- bzipped2 = (Bzip2_T) NULL;
+ *bzipped2 = (Bzip2_T) NULL;
#endif
- input2 = (FILE *) NULL;
+ *input2 = (FILE *) NULL;
} else {
if (gunzip_p == true) {
#ifdef HAVE_ZLIB
- if ((gzipped2 = gzopen(files[0],"rb")) == NULL) {
- fprintf(stderr,"Cannot open gzipped file %s\n",files[0]);
+ if ((*gzipped2 = gzopen((*files)[0],"rb")) == NULL) {
+ fprintf(stderr,"Cannot open gzipped file %s\n",(*files)[0]);
exit(9);
} else {
#ifdef HAVE_ZLIB_GZBUFFER
- gzbuffer(gzipped2,GZBUFFER_SIZE);
+ gzbuffer(*gzipped2,GZBUFFER_SIZE);
#endif
- /* nextchar2 = */ Shortread_input_init_gzip(gzipped2);
+ /* nextchar2 = */ Shortread_input_init_gzip(*gzipped2);
}
#endif
} else if (bunzip2_p == true) {
#ifdef HAVE_BZLIB
- if ((bzipped2 = Bzip2_new(files[0])) == NULL) {
- fprintf(stderr,"Cannot open bzip2 file %s\n",files[0]);
+ if ((*bzipped2 = Bzip2_new((*files)[0])) == NULL) {
+ fprintf(stderr,"Cannot open bzip2 file %s\n",(*files)[0]);
exit(9);
} else {
- /* nextchar2 = */ Shortread_input_init_bzip2(bzipped2);
+ /* nextchar2 = */ Shortread_input_init_bzip2(*bzipped2);
}
#endif
} else {
- if ((input2 = FOPEN_READ_TEXT(files[0])) == NULL) {
- fprintf(stderr,"Cannot open file %s\n",files[0]);
+ if ((*input2 = FOPEN_READ_TEXT((*files)[0])) == NULL) {
+ fprintf(stderr,"Cannot open file %s\n",(*files)[0]);
exit(9);
} else {
- /* nextchar2 = */ Shortread_input_init(input2);
+ debugf(fprintf(stderr,"Master opening file %s using fopen\n",(*files)[0]));
+ /* nextchar2 = */ Shortread_input_init(&(*nchars2),*input2);
}
}
- files++;
- nfiles--;
+ (*files)++;
+ (*nfiles)--;
}
fastq_format_p = true;
- } else if (nextchar == '>') {
+ } else if (*nextchar == '>') {
/* Looks like a FASTA file */
} else {
- fprintf(stderr,"First char is %c. Expecting either '>' for FASTA or '@' for FASTQ format.\n",nextchar);
+ fprintf(stderr,"First char is %c. Expecting either '>' for FASTA or '@' for FASTQ format.\n",*nextchar);
exit(9);
}
+ return fastq_format_p;
+}
+
- /* Read in first batch of sequences */
- inbuffer = Inbuffer_new(nextchar,input,input2,
+#ifdef USE_MPI
+static void
+open_input_streams_worker (char ***files, int *nfiles,
+#if defined(USE_MPI_FILE_INPUT)
+ MPI_File *input, MPI_File *input2, MPI_Comm workers_comm,
+#else
+ FILE **input, FILE **input2,
+#endif
#ifdef HAVE_ZLIB
- gzipped,gzipped2,
+ gzFile *gzipped, gzFile *gzipped2,
#endif
#ifdef HAVE_BZLIB
- bzipped,bzipped2,
+ Bzip2_T *bzipped, Bzip2_T *bzipped2,
+#endif
+ bool gunzip_p, bool bunzip2_p, bool fastq_format_p,
+ int argc, char **argv) {
+
+ *input = *input2 = NULL;
+#ifdef HAVE_ZLIB
+ *gzipped = *gzipped2 = NULL;
#endif
-#ifdef HAVE_GOBY
- gobyreader,
+#ifdef HAVE_BZLIB
+ *bzipped = *bzipped2 = NULL;
#endif
- files,nfiles,fastq_format_p,creads_format_p,
- barcode_length,invert_first_p,invert_second_p,chop_primers_p,
- inbuffer_nspaces,inbuffer_maxchars,part_interval,part_modulus,
- filter_if_both_p);
- nread = Inbuffer_fill_init(inbuffer);
+ /* Open input stream and peek at first char */
+ if (argc == 0) {
+ fprintf(stderr,"For mpi_gsnap, cannot read from stdin\n");
+ exit(9);
- if (nread > 1) {
- multiple_sequences_p = true;
- if (offsetsstrm_access != USE_ALLOCATE || genome_access != USE_ALLOCATE ||
- sarray_access != USE_ALLOCATE || aux_access != USE_ALLOCATE) {
- fprintf(stderr,"Note: >1 sequence detected, so index files are being memory mapped.\n");
- fprintf(stderr," GSNAP can run slowly at first while the computer starts to accumulate\n");
- fprintf(stderr," pages from the hard disk into its cache. To copy index files into RAM\n");
- fprintf(stderr," instead of memory mapping, use -B 3, -B 4, or -B 5, if you have enough RAM.\n");
-#ifdef HAVE_PTHREAD
- fprintf(stderr," For more speed, also try multiple threads (-t <int>), if you have multiple processors or cores.");
+ } else {
+ *files = argv;
+ *nfiles = argc;
+
+ if (gunzip_p == true) {
+#ifdef HAVE_ZLIB
+ if ((*gzipped = gzopen((*files)[0],"rb")) == NULL) {
+ fprintf(stderr,"Cannot open gzipped file %s\n",(*files)[0]);
+ exit(9);
+ } else {
+#ifdef HAVE_ZLIB_GZBUFFER
+ gzbuffer(*gzipped,GZBUFFER_SIZE);
+#endif
+ }
+#endif
+
+ } else if (bunzip2_p == true) {
+#ifdef HAVE_BZLIB
+ if ((*bzipped = Bzip2_new((*files)[0])) == NULL) {
+ fprintf(stderr,"Cannot open bzipped file %s\n",(*files)[0]);
+ exit(9);
+ }
+#endif
+
+ } else {
+#if defined(USE_MPI_FILE_INPUT)
+ if ((*input = MPI_fopen((*files)[0],workers_comm)) == NULL) {
+ fprintf(stderr,"Cannot open file %s\n",(*files)[0]);
+ exit(9);
+ }
+ debugf(fprintf(stderr,"Slave opening file %s using MPI_File_open\n",(*files)[0]));
+#else
+ if ((*input = FOPEN_READ_TEXT((*files)[0])) == NULL) {
+ fprintf(stderr,"Cannot open file %s\n",(*files)[0]);
+ exit(9);
+ }
+ debugf(fprintf(stderr,"Slave opening file %s using fopen\n",(*files)[0]));
#endif
- fprintf(stderr,"\n");
}
- } else {
- /* multiple_sequences_p = false; */
- /* fprintf(stderr,"Note: only 1 sequence detected. Ignoring batch (-B) command\n"); */
- expand_offsets_p = false;
-#ifdef HAVE_MMAP
- offsetsstrm_access = USE_MMAP_ONLY;
- positions_access = USE_MMAP_ONLY;
- genome_access = USE_MMAP_ONLY;
- sarray_access = USE_MMAP_ONLY;
- aux_access = USE_MMAP_ONLY;
+
+ (*files)++;
+ (*nfiles)--;
+ }
+
+ if (fastq_format_p == true) {
+ /* Looks like a FASTQ file */
+ if (*nfiles == 0 || force_single_end_p == true) {
+#ifdef HAVE_ZLIB
+ *gzipped2 = (gzFile) NULL;
+#endif
+#ifdef HAVE_BZLIB
+ *bzipped2 = (Bzip2_T) NULL;
+#endif
+#if defined(USE_MPI_FILE_INPUT)
+ *input2 = (MPI_File) NULL;
#else
- offsetsstrm_access = USE_ALLOCATE;
- positions_access = USE_ALLOCATE;
- genome_access = USE_ALLOCATE;
- sarray_access = USE_ALLOCATE;
- aux_access = USE_ALLOCATE;
+ *input2 = (FILE *) NULL;
+#endif
+ } else {
+ if (gunzip_p == true) {
+#ifdef HAVE_ZLIB
+ if ((*gzipped2 = gzopen((*files)[0],"rb")) == NULL) {
+ fprintf(stderr,"Cannot open gzipped file %s\n",(*files)[0]);
+ exit(9);
+ } else {
+#ifdef HAVE_ZLIB_GZBUFFER
+ gzbuffer(*gzipped2,GZBUFFER_SIZE);
+#endif
+ }
+#endif
+
+ } else if (bunzip2_p == true) {
+#ifdef HAVE_BZLIB
+ if ((*bzipped2 = Bzip2_new((*files)[0])) == NULL) {
+ fprintf(stderr,"Cannot open bzip2 file %s\n",(*files)[0]);
+ exit(9);
+ }
+#endif
+
+ } else {
+#if defined(USE_MPI_FILE_INPUT)
+ if ((*input2 = MPI_fopen((*files)[0],workers_comm)) == NULL) {
+ fprintf(stderr,"Cannot open file %s\n",(*files)[0]);
+ exit(9);
+ }
+ debugf(fprintf(stderr,"Slave opening file %s using MPI_File_open\n",(*files)[0]));
+#else
+ if ((*input2 = FOPEN_READ_TEXT((*files)[0])) == NULL) {
+ fprintf(stderr,"Cannot open file %s\n",(*files)[0]);
+ exit(9);
+ }
+ debugf(fprintf(stderr,"Slave opening file %s using fopen\n",(*files)[0]));
#endif
+ }
+ (*files)++;
+ (*nfiles)--;
+ }
}
+ return;
+}
+#endif
- /* Prepare genomic data */
- genomesubdir = Datadir_find_genomesubdir(&fileroot,&dbversion,user_genomedir,dbroot);
+static Univ_IIT_T
+chromosome_iit_setup (int *nchromosomes, int *circular_typeint, bool *any_circular_p, bool **circularp,
+ char *genomesubdir, char *fileroot) {
+ Univ_IIT_T chromosome_iit = NULL;
+ char *iitfile = NULL;
+
+ /* Prepare genomic data */
iitfile = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+
strlen(fileroot)+strlen(".chromosome.iit")+1,sizeof(char));
@@ -2410,32 +2605,41 @@ main (int argc, char *argv[]) {
exit(9);
#endif
} else {
- nchromosomes = Univ_IIT_total_nintervals(chromosome_iit);
- circular_typeint = Univ_IIT_typeint(chromosome_iit,"circular");
- circularp = Univ_IIT_circularp(chromosome_iit);
+ *nchromosomes = Univ_IIT_total_nintervals(chromosome_iit);
+ *circular_typeint = Univ_IIT_typeint(chromosome_iit,"circular");
+ *circularp = Univ_IIT_circularp(&(*any_circular_p),chromosome_iit);
}
FREE(iitfile);
+ return chromosome_iit;
+}
+
+
+static void
+worker_setup (char *genomesubdir, char *fileroot) {
+ char *snpsdir = NULL, *modedir = NULL, *mapdir = NULL, *iitfile = NULL;
+ Splicestringpool_T splicestringpool;
if (snps_root == NULL) {
genomecomp = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- uncompressedp,genome_access);
+ uncompressedp,genome_access,sharedp);
genomebits = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_BITS,
- uncompressedp,genome_access);
+ uncompressedp,genome_access,sharedp);
+
#ifndef LARGE_GENOMES
if (use_sarray_p == true) {
if (mode == STANDARD) {
- if ((sarray_fwd = Sarray_new(genomesubdir,fileroot,/*snps_root*/NULL,sarray_access,aux_access,
- mode,/*fwdp*/true)) == NULL) {
+ if ((sarray_fwd = Sarray_new(genomesubdir,fileroot,/*snps_root*/NULL,sarray_access,lcp_access,
+ guideexc_access,indexij_access,sharedp,mode,/*fwdp*/true)) == NULL) {
use_sarray_p = false;
} else {
sarray_rev = sarray_fwd;
}
} else {
- if ((sarray_fwd = Sarray_new(genomesubdir,fileroot,/*snps_root*/NULL,sarray_access,aux_access,
- mode,/*fwdp*/true)) == NULL ||
- (sarray_rev = Sarray_new(genomesubdir,fileroot,/*snps_root*/NULL,sarray_access,aux_access,
- mode,/*fwdp*/false)) == NULL) {
+ if ((sarray_fwd = Sarray_new(genomesubdir,fileroot,/*snps_root*/NULL,sarray_access,lcp_access,
+ guideexc_access,indexij_access,sharedp,mode,/*fwdp*/true)) == NULL ||
+ (sarray_rev = Sarray_new(genomesubdir,fileroot,/*snps_root*/NULL,sarray_access,lcp_access,
+ guideexc_access,indexij_access,sharedp,mode,/*fwdp*/false)) == NULL) {
use_sarray_p = false;
}
}
@@ -2451,7 +2655,7 @@ main (int argc, char *argv[]) {
if ((indexdb = Indexdb_new_genome(&index1part,&index1interval,
genomesubdir,fileroot,/*idx_filesuffix*/"dibase",/*snps_root*/NULL,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,sharedp)) == NULL) {
fprintf(stderr,"Cannot find offsets file %s.%s*offsets, needed for GSNAP color mode\n",fileroot,"dibase");
exit(9);
}
@@ -2468,7 +2672,7 @@ main (int argc, char *argv[]) {
if ((indexdb = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"metct",/*snps_root*/NULL,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,sharedp)) == NULL) {
fprintf(stderr,"Cannot find metct index file. Need to run cmetindex first\n");
exit(9);
}
@@ -2476,7 +2680,7 @@ main (int argc, char *argv[]) {
if ((indexdb2 = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"metga",/*snps_root*/NULL,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,sharedp)) == NULL) {
fprintf(stderr,"Cannot find metga index file. Need to run cmetindex first\n");
exit(9);
}
@@ -2491,7 +2695,7 @@ main (int argc, char *argv[]) {
if ((indexdb = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"a2iag",/*snps_root*/NULL,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,sharedp)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -2499,7 +2703,7 @@ main (int argc, char *argv[]) {
if ((indexdb2 = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"a2itc",/*snps_root*/NULL,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,sharedp)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -2510,7 +2714,7 @@ main (int argc, char *argv[]) {
if ((indexdb = Indexdb_new_genome(&index1part,&index1interval,
genomesubdir,fileroot,IDX_FILESUFFIX,/*snps_root*/NULL,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,sharedp)) == NULL) {
fprintf(stderr,"Cannot find offsets file %s.%s*offsets, needed for GSNAP\n",fileroot,IDX_FILESUFFIX);
exit(9);
}
@@ -2528,29 +2732,29 @@ main (int argc, char *argv[]) {
/* SNPs */
genomecomp = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- uncompressedp,genome_access);
+ uncompressedp,genome_access,sharedp);
genomecomp_alt = Genome_new(snpsdir,fileroot,snps_root,/*genometype*/GENOME_OLIGOS,
- uncompressedp,genome_access);
+ uncompressedp,genome_access,sharedp);
genomebits = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_BITS,
- uncompressedp,genome_access);
+ uncompressedp,genome_access,sharedp);
genomebits_alt = Genome_new(snpsdir,fileroot,snps_root,/*genometype*/GENOME_BITS,
- uncompressedp,genome_access);
+ uncompressedp,genome_access,sharedp);
#ifndef LARGE_GENOMES
if (use_sarray_p == true) {
fprintf(stderr,"Note: Suffix arrays will bias against SNP-tolerant alignment. For bias-free alignment, set --use-sarray=0\n");
if (mode == STANDARD) {
- if ((sarray_fwd = Sarray_new(genomesubdir,fileroot,/*snps_root*/NULL,sarray_access,aux_access,
- mode,/*fwdp*/true)) == NULL) {
+ if ((sarray_fwd = Sarray_new(genomesubdir,fileroot,/*snps_root*/NULL,sarray_access,lcp_access,
+ guideexc_access,indexij_access,sharedp,mode,/*fwdp*/true)) == NULL) {
use_sarray_p = false;
} else {
sarray_rev = sarray_fwd;
}
} else {
- if ((sarray_fwd = Sarray_new(genomesubdir,fileroot,/*snps_root*/NULL,sarray_access,aux_access,
- mode,/*fwdp*/true)) == NULL ||
- (sarray_rev = Sarray_new(genomesubdir,fileroot,/*snps_root*/NULL,sarray_access,aux_access,
- mode,/*fwdp*/false)) == NULL) {
+ if ((sarray_fwd = Sarray_new(genomesubdir,fileroot,/*snps_root*/NULL,sarray_access,lcp_access,
+ guideexc_access,indexij_access,sharedp,mode,/*fwdp*/true)) == NULL ||
+ (sarray_rev = Sarray_new(genomesubdir,fileroot,/*snps_root*/NULL,sarray_access,lcp_access,
+ guideexc_access,indexij_access,sharedp,mode,/*fwdp*/false)) == NULL) {
use_sarray_p = false;
}
}
@@ -2572,14 +2776,14 @@ main (int argc, char *argv[]) {
if ((indexdb = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"metct",snps_root,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,sharedp)) == NULL) {
fprintf(stderr,"Cannot find metct index file. Need to run cmetindex first\n");
exit(9);
}
if ((indexdb2 = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"metga",snps_root,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,sharedp)) == NULL) {
fprintf(stderr,"Cannot find metga index file. Need to run cmetindex first\n");
exit(9);
}
@@ -2594,14 +2798,14 @@ main (int argc, char *argv[]) {
if ((indexdb = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"a2iag",snps_root,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,sharedp)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
if ((indexdb2 = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"a2itc",snps_root,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,sharedp)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -2610,7 +2814,7 @@ main (int argc, char *argv[]) {
indexdb = Indexdb_new_genome(&index1part,&index1interval,
snpsdir,fileroot,/*idx_filesuffix*/"ref",snps_root,
required_index1part,required_index1interval,
- expand_offsets_p,offsetsstrm_access,positions_access);
+ expand_offsets_p,offsetsstrm_access,positions_access,sharedp);
if (indexdb == NULL) {
fprintf(stderr,"Cannot find snps index file for %s in directory %s\n",snps_root,snpsdir);
exit(9);
@@ -2650,7 +2854,6 @@ main (int argc, char *argv[]) {
}
if (use_only_sarray_p == false) {
- Dynprog_init(mode);
Compoundpos_init_positions_free(Indexdb_positions_fileio_p(indexdb));
Spanningelt_init_positions_free(Indexdb_positions_fileio_p(indexdb));
Stage1_init_positions_free(Indexdb_positions_fileio_p(indexdb));
@@ -2893,39 +3096,6 @@ main (int argc, char *argv[]) {
fprintf(stderr,"done\n");
}
- FREE(genomesubdir);
- FREE(fileroot);
-
-
-#ifdef HAVE_GOBY
- Goby_setup(show_refdiff_p);
-
- /* Setup outbuffer */
- if (output_goby_p == true) {
- if (goby_output_root == NULL) {
- fprintf(stderr,"--goby-output must be specified for Goby output. For usage, run 'gsnap --help'\n");
- /* print_program_usage(); */
- exit(9);
- } else if (creads_format_p == false) {
- fprintf(stderr,"Currently can only write Goby if you read from compact reads files\n");
- exit(9);
- } else {
- gobywriter = Goby_writer_new(goby_output_root,"gsnap",PACKAGE_VERSION);
- Goby_writer_add_chromosomes(gobywriter,chromosome_iit);
- }
- }
-
- if (gobywriter && failedinput_root != NULL) {
- fprintf(stderr,"Goby output doesn't support the --failed-input option. Turning it off.\n");
- failedinput_root = NULL;
- }
- if (gobywriter && sevenway_root != NULL) {
- fprintf(stderr,"Goby output doesn't support the --split-output option. Turning it off.\n");
- sevenway_root = NULL;
- }
-#endif
-
-
Genome_setup(genomecomp,genomecomp_alt,mode,circular_typeint);
#ifndef LARGE_GENOMES
@@ -2946,6 +3116,30 @@ main (int argc, char *argv[]) {
}
Genome_sites_setup(Genome_blocks(genomecomp),/*snp_blocks*/genomecomp_alt ? Genome_blocks(genomecomp_alt) : NULL);
Maxent_hr_setup(Genome_blocks(genomecomp),/*snp_blocks*/genomecomp_alt ? Genome_blocks(genomecomp_alt) : NULL);
+
+ Dynprog_init(mode); /* Need Dynprog for sarray_gmap */
+ Dynprog_single_setup(/*homopolymerp*/false);
+ Dynprog_genome_setup(novelsplicingp,splicing_iit,splicing_divint_crosstable,
+ donor_typeint,acceptor_typeint);
+ Dynprog_end_setup(splicesites,splicetypes,splicedists,nsplicesites,
+ trieoffsets_obs,triecontents_obs,trieoffsets_max,triecontents_max);
+ Pair_setup(trim_mismatch_score,trim_indel_score,/*gff3_separators_p*/false,sam_insert_0M_p,
+ force_xs_direction_p,md_lowercase_variant_p,
+ /*snps_p*/snps_iit ? true : false,print_nsnpdiffs_p,
+ Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false));
+ Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
+ /*require_splicedir_p*/true,splicing_iit,splicing_divint_crosstable,
+ donor_typeint,acceptor_typeint,
+ splicesites,min_intronlength,max_deletionlength,min_indel_end_matches,
+ maxpeelback_distalmedial,nullgap,extramaterial_end,extramaterial_paired,
+ extraband_single,extraband_end,extraband_paired,
+ ngap,/*maxintronlen*/shortsplicedist,
+ output_sam_p,/*homopolymerp*/false,/*stage3debug*/NO_STAGE3DEBUG);
+ Oligoindex_hr_setup(Genome_blocks(genomecomp),mode);
+ Stage2_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,/*cross_species_p*/false,
+ suboptimal_score_start,suboptimal_score_end,sufflookback,nsufflookback,
+ /*maxintronlen*/shortsplicedist,mode,/*snps_p*/snps_iit ? true : false);
+
if (use_only_sarray_p == true) {
spansize = 1;
} else {
@@ -2953,26 +3147,6 @@ main (int argc, char *argv[]) {
Indexdb_hr_setup(index1part);
Oligo_setup(index1part);
spansize = Spanningelt_setup(index1part,index1interval);
-
- Dynprog_single_setup(/*homopolymerp*/false);
- Dynprog_genome_setup(novelsplicingp,splicing_iit,splicing_divint_crosstable,
- donor_typeint,acceptor_typeint);
- Dynprog_end_setup(splicesites,splicetypes,splicedists,nsplicesites,
- trieoffsets_obs,triecontents_obs,trieoffsets_max,triecontents_max);
- Oligoindex_hr_setup(Genome_blocks(genomecomp),mode);
- Stage2_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,/*cross_species_p*/false,
- suboptimal_score_start,suboptimal_score_end,
- mode,/*snps_p*/snps_iit ? true : false);
- Pair_setup(trim_mismatch_score,trim_indel_score,/*gff3_separators_p*/false,sam_insert_0M_p,
- force_xs_direction_p,md_lowercase_variant_p,
- /*snps_p*/snps_iit ? true : false,
- Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false),
- cigar_action);
- Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
- /*require_splicedir_p*/true,splicing_iit,splicing_divint_crosstable,
- donor_typeint,acceptor_typeint,
- splicesites,min_intronlength,max_deletionlength,min_indel_end_matches,
- output_sam_p,/*homopolymerp*/false,/*stage3debug*/NO_STAGE3DEBUG);
}
Splicetrie_setup(splicecomp,splicesites,splicefrags_ref,splicefrags_alt,
@@ -2981,9 +3155,9 @@ main (int argc, char *argv[]) {
Splice_setup(min_shortend);
Indel_setup(min_indel_end_matches,indel_penalty_middle);
Stage1hr_setup(use_sarray_p,use_only_sarray_p,index1part,index1interval,spansize,chromosome_iit,nchromosomes,
- genomecomp_alt,mode,maxpaths_search,terminal_threshold,reject_trimlength,
+ genomecomp,genomecomp_alt,mode,maxpaths_search,terminal_threshold,reject_trimlength,
splicesites,splicetypes,splicedists,nsplicesites,
- novelsplicingp,knownsplicingp,distances_observed_p,
+ novelsplicingp,knownsplicingp,find_dna_chimeras_p,distances_observed_p,
subopt_levels,max_middle_insertions,max_middle_deletions,
shortsplicedist,shortsplicedist_known,shortsplicedist_novelend,min_intronlength,
min_distantsplicing_end_matches,min_distantsplicing_identity,
@@ -2999,135 +3173,53 @@ main (int argc, char *argv[]) {
novelsplicingp,knownsplicingp,output_sam_p,mode,
Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false),
reject_trimlength);
- Stage3hr_setup(invert_first_p,invert_second_p,genes_iit,genes_divint_crosstable,
+ Stage3hr_setup(invert_first_p,invert_second_p,genomecomp,chromosome_iit,nchromosomes,circular_typeint,
+ genes_iit,genes_divint_crosstable,
tally_iit,tally_divint_crosstable,runlength_iit,runlength_divint_crosstable,
reject_trimlength,distances_observed_p,pairmax,
expected_pairlength,pairlength_deviation,
localsplicing_penalty,indel_penalty_middle,antistranded_penalty,
favor_multiexon_p,gmap_min_nconsecutive,index1part,index1interval,novelsplicingp,
- merge_samechr_p,circularp,failedinput_root,fastq_format_p,print_m8_p,want_random_p);
+ merge_samechr_p,circularp,failedinput_root,print_m8_p,want_random_p);
SAM_setup(quiet_if_excessive_p,maxpaths_report,failedinput_root,fastq_format_p,hide_soft_clips_p,
- sam_multiple_primaries_p,force_xs_direction_p,md_lowercase_variant_p,snps_iit);
+ clip_overlap_p,merge_overlap_p,sam_multiple_primaries_p,
+ force_xs_direction_p,md_lowercase_variant_p,snps_iit,chromosome_iit,genomecomp);
+ Output_setup(chromosome_iit,nofailsp,failsonlyp,quiet_if_excessive_p,maxpaths_report,
+ failedinput_root,quality_shift,
+ output_sam_p,print_m8_p,invert_first_p,invert_second_p,
+ merge_samechr_p,sam_read_group_id);
- outbuffer = Outbuffer_new(output_buffer_size,nread,sevenway_root,failedinput_root,appendp,
- chromosome_iit,timingp,
- output_sam_p,sam_headers_p,sam_read_group_id,sam_read_group_name,
- sam_read_group_library,sam_read_group_platform,
- nworkers,orderedp,gobywriter,nofailsp,failsonlyp,
- fastq_format_p,clip_overlap_p,merge_overlap_p,merge_samechr_p,print_m8_p,
- maxpaths_report,quiet_if_excessive_p,quality_shift,
- invert_first_p,invert_second_p,pairmax,argc,argv,optind);
+ return;
+}
- Inbuffer_set_outbuffer(inbuffer,outbuffer);
- fprintf(stderr,"Starting alignment\n");
- stopwatch = Stopwatch_new();
- Stopwatch_start(stopwatch);
+static void
+worker_cleanup () {
-#ifndef HAVE_PTHREAD
- single_thread();
-#else
- if (nworkers == 0) {
- single_thread();
+ if (use_only_sarray_p == false) {
+ Stage1hr_cleanup();
+ }
- } else if (multiple_sequences_p == false) {
- single_thread();
+ Dynprog_term();
- } else {
-#ifdef WORKER_DETACH
- pthread_attr_init(&thread_attr_detach);
- if ((ret = pthread_attr_setdetachstate(&thread_attr_detach,PTHREAD_CREATE_DETACHED)) != 0) {
- fprintf(stderr,"ERROR: pthread_attr_setdetachstate returned %d\n",ret);
- exit(1);
+ if (indexdb2 != indexdb) {
+ Indexdb_free(&indexdb2);
+ }
+ if (indexdb != NULL) {
+ Indexdb_free(&indexdb);
+ }
+ if (dbversion != NULL) {
+ FREE(dbversion);
+ }
+#ifndef LARGE_GENOMES
+ if (sarray_fwd != NULL && sarray_rev != NULL) {
+ if (mode == STANDARD) {
+ Sarray_free(&sarray_fwd);
+ } else {
+ Sarray_free(&sarray_rev);
+ Sarray_free(&sarray_fwd);
}
-#endif
- pthread_attr_init(&thread_attr_join);
- if ((ret = pthread_attr_setdetachstate(&thread_attr_join,PTHREAD_CREATE_JOINABLE)) != 0) {
- fprintf(stderr,"ERROR: pthread_attr_setdetachstate returned %d\n",ret);
- exit(1);
- }
-
- worker_thread_ids = (pthread_t *) CALLOC(nworkers,sizeof(pthread_t));
-
- Except_init_pthread();
- pthread_key_create(&global_request_key,NULL);
-
- if (orderedp == true) {
- pthread_create(&output_thread_id,&thread_attr_join,Outbuffer_thread_ordered,
- (void *) outbuffer);
- } else {
- pthread_create(&output_thread_id,&thread_attr_join,Outbuffer_thread_anyorder,
- (void *) outbuffer);
- }
- for (worker_id = 0; worker_id < nworkers; worker_id++) {
-#ifdef WORKER_DETACH
- pthread_create(&(worker_thread_ids[worker_id]),&thread_attr_detach,worker_thread,(void *) worker_id);
-#else
- /* Need to have worker threads finish before we call Inbuffer_free() */
- pthread_create(&(worker_thread_ids[worker_id]),&thread_attr_join,worker_thread,(void *) worker_id);
-#endif
- }
-
- pthread_join(output_thread_id,NULL);
- for (worker_id = 0; worker_id < nworkers; worker_id++) {
- pthread_join(worker_thread_ids[worker_id],NULL);
- }
-
- pthread_key_delete(global_request_key);
- /* Do not delete global_except_key, because worker threads might still need it */
- /* Except_term_pthread(); */
-
- FREE(worker_thread_ids);
- }
-#endif /* HAVE_PTHREAD */
-
- runtime = Stopwatch_stop(stopwatch);
- Stopwatch_free(&stopwatch);
-
- nread = Outbuffer_nread(outbuffer);
- fprintf(stderr,"Processed %u queries in %.2f seconds (%.2f queries/sec)\n",
- nread,runtime,(double) nread/runtime);
-
- Outbuffer_free(&outbuffer);
- Inbuffer_free(&inbuffer); /* Also closes inputs, except for Goby */
-
- if (output_goby_p == true) {
- Goby_writer_finish(gobywriter,gobyreader);
- Goby_writer_free(&gobywriter);
- }
- if (creads_format_p == true) {
- Goby_reader_finish(gobyreader);
- Goby_reader_free(&gobyreader);
- }
-
-#ifdef HAVE_GOBY
- /* Always call this, even if not using goby */
- Goby_shutdown();
-#endif
-
- if (use_only_sarray_p == false) {
- Dynprog_term();
- Stage1hr_cleanup();
- }
-
- if (indexdb2 != indexdb) {
- Indexdb_free(&indexdb2);
- }
- if (indexdb != NULL) {
- Indexdb_free(&indexdb);
- }
- if (dbversion != NULL) {
- FREE(dbversion);
- }
-#ifndef LARGE_GENOMES
- if (sarray_fwd != NULL && sarray_rev != NULL) {
- if (mode == STANDARD) {
- Sarray_free(&sarray_fwd);
- } else {
- Sarray_free(&sarray_rev);
- Sarray_free(&sarray_fwd);
- }
- }
+ }
#endif
if (genomecomp_alt != NULL) {
Genome_free(&genomecomp_alt);
@@ -3194,6 +3286,488 @@ main (int argc, char *argv[]) {
Univ_IIT_free(&chromosome_iit);
}
+ Access_controlled_cleanup();
+
+ return;
+}
+
+
+int
+main (int argc, char *argv[]) {
+ int nchars1 = 0, nchars2 = 0;
+ bool multiple_sequences_p;
+ int cmdline_status;
+
+ char *genomesubdir, *fileroot, *dbversion;
+ char **files;
+ int nfiles;
+#if defined(USE_MPI) && defined(USE_MPI_FILE_INPUT)
+ MPI_File mpi_file_input, mpi_file_input_2;
+#endif
+
+#ifdef USE_MPI
+ Master_T master;
+ bool master_is_worker_p;
+ char **files_master;
+ int nfiles_master;
+ FILE *input_parser, *input2_parser;
+#endif
+ FILE *input, *input2;
+
+#ifdef HAVE_ZLIB
+#ifdef USE_MPI
+ gzFile gzipped_master, gzipped2_master;
+#endif
+ gzFile gzipped, gzipped2;
+#endif
+
+#ifdef HAVE_BZLIB
+#ifdef USE_MPI
+ Bzip2_T bzipped_master, bzipped2_master;
+#endif
+ Bzip2_T bzipped, bzipped2;
+#endif
+
+ long int worker_id;
+
+ int nread;
+ int nextchar = '\0';
+ double runtime;
+
+#ifdef HAVE_PTHREAD
+ int ret;
+ pthread_attr_t thread_attr_join;
+#ifdef USE_MPI
+ pthread_attr_t thread_attr_detach;
+#endif
+#endif
+
+#ifdef HAVE_SIGACTION
+ struct sigaction signal_action;
+#endif
+
+ extern int optind;
+
+#ifdef MEMUSAGE
+ Mem_usage_init();
+ Mem_usage_set_threadname("main");
+#endif
+
+
+ cmdline_status = parse_command_line(argc,argv,optind);
+ argc -= optind;
+ argv += optind;
+
+ if (cmdline_status == 0) {
+ /* okay to continue */
+ } else if (cmdline_status == 1) {
+ exit(0);
+ } else {
+ exit(cmdline_status);
+ }
+
+ check_compiler_assumptions();
+
+ if (exception_raise_p == false) {
+ fprintf(stderr,"Allowing signals and exceptions to pass through\n");
+ Except_inactivate();
+ } else {
+#ifdef HAVE_SIGACTION
+ signal_action.sa_handler = signal_handler;
+ signal_action.sa_flags = 0;
+ sigfillset(&signal_action.sa_mask); /* After first signal, block all other signals */
+
+ /* Note: SIGKILL and SIGSTOP cannot be caught */
+
+ sigaction(SIGABRT,&signal_action,NULL); /* abnormal termination (abort) */
+ sigaction(SIGBUS,&signal_action,NULL); /* bus error */
+ sigaction(SIGFPE,&signal_action,NULL); /* arithmetic exception */
+ sigaction(SIGHUP,&signal_action,NULL); /* hangup */
+ sigaction(SIGILL,&signal_action,NULL); /* illegal hardware instruction */
+ sigaction(SIGINT,&signal_action,NULL); /* terminal interruption (control-C) */
+ sigaction(SIGPIPE,&signal_action,NULL); /* write to pipe with no readers */
+ sigaction(SIGQUIT,&signal_action,NULL); /* terminal quit (control-backslash) */
+ sigaction(SIGSEGV,&signal_action,NULL); /* invalid memory reference */
+ sigaction(SIGSYS,&signal_action,NULL); /* invalid system call */
+ sigaction(SIGTERM,&signal_action,NULL); /* Unix kill command */
+ sigaction(SIGTRAP,&signal_action,NULL); /* hardware fault */
+ sigaction(SIGXCPU,&signal_action,NULL); /* CPU limit exceeded */
+ sigaction(SIGXFSZ,&signal_action,NULL); /* file size limit exceeded */
+#endif
+ }
+
+#ifdef USE_MPI
+ /* MPI_Init(&argc,&argv); */
+ MPI_Init_thread(&argc,&argv,/*requested*/MPI_THREAD_MULTIPLE,&provided);
+ MPI_Comm_rank(MPI_COMM_WORLD,&myid);
+ MPI_Comm_size(MPI_COMM_WORLD,&nranks);
+ MPI_Debug_setup(myid);
+
+ if ((nthreads0 = nthreads - 1) <= 0) {
+ /* Exclude master rank 0 from workers_group */
+ exclude_ranks[0] = 0;
+ MPI_Comm_group(MPI_COMM_WORLD,&world_group);
+ MPI_Group_excl(world_group,1,exclude_ranks,&workers_group);
+ MPI_Comm_create(MPI_COMM_WORLD,workers_group,&workers_comm);
+ MPI_Group_free(&workers_group);
+ MPI_Group_free(&world_group);
+ master_is_worker_p = false;
+
+ } else {
+ /* Include master rank 0 in workers group */
+ MPI_Comm_group(MPI_COMM_WORLD,&world_group);
+ MPI_Comm_create(MPI_COMM_WORLD,world_group,&workers_comm);
+ MPI_Group_free(&world_group);
+ master_is_worker_p = true;
+ }
+ n_slave_ranks = nranks - 1; /* Don't include master, even if it's a worker */
+
+ if (myid == 0) {
+ nthreads = nthreads0;
+ fastq_format_p = open_input_streams_parser(&nextchar,&nchars1,&nchars2,
+ &files_master,&nfiles_master,&input_parser,&input2_parser,
+#ifdef HAVE_ZLIB
+ &gzipped_master,&gzipped2_master,
+#endif
+#ifdef HAVE_BZLIB
+ &bzipped_master,&bzipped2_master,
+#endif
+ gunzip_p,bunzip2_p,argc,argv);
+ master = Master_new(n_slave_ranks,nextchar,nchars1,nchars2,
+ input_parser,input2_parser,
+#ifdef HAVE_ZLIB
+ gzipped_master,gzipped2_master,
+#endif
+#ifdef HAVE_BZLIB
+ bzipped_master,bzipped2_master,
+#endif
+ files_master,nfiles_master,inbuffer_nspaces,part_modulus,part_interval);
+ }
+
+ MPI_Bcast(&fastq_format_p,1,MPI_BOOL_T,/*root*/0,MPI_COMM_WORLD);
+ MPI_Bcast(&nextchar,1,MPI_CHAR,/*root*/0,MPI_COMM_WORLD);
+
+ /* If not using MPI_File, then master already has input and input2,
+ and does not need mpi_file_input or mpi_file_input_2 (because of the workers_comm) */
+ if (myid > 0 || master_is_worker_p == true) {
+ open_input_streams_worker(&files,&nfiles,
+#ifdef USE_MPI_FILE_INPUT
+ &mpi_file_input,&mpi_file_input_2,workers_comm,
+#else
+ &input,&input2,
+#endif
+#ifdef HAVE_ZLIB
+ &gzipped,&gzipped2,
+#endif
+#ifdef HAVE_BZLIB
+ &bzipped,&bzipped2,
+#endif
+ gunzip_p,bunzip2_p,fastq_format_p,argc,argv);
+
+ /* Inbuffer_master_process skips to part_modulus, so workers need it set to 0 */
+ Inbuffer_setup(filter_if_both_p,
+#ifdef USE_MPI_FILE_INPUT
+ workers_comm,
+#endif
+ /*part_modulus*/0,part_interval);
+
+ inbuffer = Inbuffer_new(nextchar,myid,
+#ifdef USE_MPI_FILE_INPUT
+ mpi_file_input,mpi_file_input_2,
+#else
+ input,input2,
+#endif
+#ifdef HAVE_ZLIB
+ gzipped,gzipped2,
+#endif
+#ifdef HAVE_BZLIB
+ bzipped,bzipped2,
+#endif
+ files,nfiles,inbuffer_nspaces);
+ }
+
+ Shortread_setup(acc_fieldi_start,acc_fieldi_end,force_single_end_p,filter_chastity_p,
+ allow_paired_end_mismatch_p,fastq_format_p,barcode_length,
+ invert_first_p,invert_second_p);
+ multiple_sequences_p = true;
+
+
+#else
+ /* Non-MPI version */
+ fastq_format_p = open_input_streams_parser(&nextchar,&nchars1,&nchars2,&files,&nfiles,&input,&input2,
+#ifdef HAVE_ZLIB
+ &gzipped,&gzipped2,
+#endif
+#ifdef HAVE_BZLIB
+ &bzipped,&bzipped2,
+#endif
+ gunzip_p,bunzip2_p,argc,argv);
+
+ Inbuffer_setup(filter_if_both_p,part_modulus,part_interval);
+
+ inbuffer = Inbuffer_new(nextchar,input,input2,
+#ifdef HAVE_ZLIB
+ gzipped,gzipped2,
+#endif
+#ifdef HAVE_BZLIB
+ bzipped,bzipped2,
+#endif
+ files,nfiles,inbuffer_nspaces);
+
+ Shortread_setup(acc_fieldi_start,acc_fieldi_end,force_single_end_p,filter_chastity_p,
+ allow_paired_end_mismatch_p,fastq_format_p,barcode_length,
+ invert_first_p,invert_second_p);
+
+ if ((nread = Inbuffer_fill_init(inbuffer)) > 1) {
+ multiple_sequences_p = true;
+ } else {
+ multiple_sequences_p = false;
+ }
+#endif
+
+ if (multiple_sequences_p == true) {
+ if (offsetsstrm_access != USE_ALLOCATE || genome_access != USE_ALLOCATE ||
+ sarray_access != USE_ALLOCATE || lcp_access != USE_ALLOCATE) {
+ fprintf(stderr,"Note: >1 sequence detected, so index files are being memory mapped.\n");
+ fprintf(stderr," GSNAP can run slowly at first while the computer starts to accumulate\n");
+ fprintf(stderr," pages from the hard disk into its cache. To copy index files into RAM\n");
+ fprintf(stderr," instead of memory mapping, use -B 3, -B 4, or -B 5, if you have enough RAM.\n");
+#ifdef HAVE_PTHREAD
+ fprintf(stderr," For more speed, also try multiple threads (-t <int>), if you have multiple processors or cores.");
+#endif
+ fprintf(stderr,"\n");
+ }
+
+ } else {
+ /* fprintf(stderr,"Note: only 1 sequence detected. Ignoring batch (-B) command\n"); */
+ expand_offsets_p = false;
+#ifdef HAVE_MMAP
+ offsetsstrm_access = USE_MMAP_ONLY;
+ positions_access = USE_MMAP_ONLY;
+ genome_access = USE_MMAP_ONLY;
+ sarray_access = USE_MMAP_ONLY;
+ lcp_access = USE_MMAP_ONLY;
+ guideexc_access = USE_MMAP_ONLY;
+ indexij_access = USE_MMAP_ONLY;
+#else
+ /* No choice, since mmap is not available */
+ offsetsstrm_access = USE_ALLOCATE;
+ positions_access = USE_ALLOCATE;
+ genome_access = USE_ALLOCATE;
+ sarray_access = USE_ALLOCATE;
+ lcp_access = USE_ALLOCATE;
+ guideexc_access = USE_ALLOCATE;
+ indexij_access = USE_ALLOCATE;
+#endif
+ }
+
+ genomesubdir = Datadir_find_genomesubdir(&fileroot,&dbversion,user_genomedir,dbroot);
+ FREE(dbversion);
+ chromosome_iit = chromosome_iit_setup(&nchromosomes,&circular_typeint,&any_circular_p,&circularp,
+ genomesubdir,fileroot);
+ Outbuffer_setup(argc,argv,optind,chromosome_iit,any_circular_p,
+ nthreads,orderedp,quiet_if_excessive_p,
+ output_sam_p,sam_headers_p,sam_read_group_id,sam_read_group_name,
+ sam_read_group_library,sam_read_group_platform,
+ appendp,output_file,split_output_root,failedinput_root);
+
+#if defined(USE_MPI) && defined(HAVE_PTHREAD)
+ /* Needed for Master_parser and possibly Master_write_stdout, which never terminate */
+ pthread_attr_init(&thread_attr_detach);
+ if ((ret = pthread_attr_setdetachstate(&thread_attr_detach,PTHREAD_CREATE_DETACHED)) != 0) {
+ fprintf(stderr,"ERROR: pthread_attr_setdetachstate returned %d\n",ret);
+ exit(1);
+ }
+#endif
+
+#ifdef USE_MPI
+ if (myid == 0 && master_is_worker_p == false) {
+ FREE(genomesubdir);
+ FREE(fileroot);
+
+ /* Master rank, which is not a worker */
+ if (output_file != NULL) {
+ fprintf(stderr,"Starting alignment. Writing results to %s\n",output_file);
+ } else if (split_output_root != NULL) {
+ fprintf(stderr,"Starting alignment. Writing results to %s.*\n",split_output_root);
+ } else {
+ fprintf(stderr,"Starting alignment\n");
+ }
+
+ stopwatch = Stopwatch_new();
+ Stopwatch_start(stopwatch);
+
+ if (split_output_root == NULL && output_file == NULL) {
+ pthread_create(&write_stdout_thread_id,&thread_attr_detach,Master_write_stdout,(void *) NULL);
+ }
+ pthread_create(&parser_thread_id,&thread_attr_detach,Master_parser,(void *) master);
+ Master_mpi_interface((void *) master); /* Can run as a normal procedure, not as a thread */
+
+ } else {
+ worker_setup(genomesubdir,fileroot);
+ FREE(genomesubdir);
+ FREE(fileroot);
+ MPI_Barrier(workers_comm);
+
+ outbuffer = Outbuffer_new(output_buffer_size,/*nread*/0);
+ Inbuffer_set_outbuffer(inbuffer,outbuffer);
+ /* MPI worker ranks continue on with creating output_thread and worker_threads below */
+
+ if (myid == 0) {
+ Inbuffer_set_master(inbuffer,master);
+
+ if (output_file != NULL) {
+ fprintf(stderr,"Starting alignment. Writing results to %s\n",output_file);
+ } else if (split_output_root != NULL) {
+ fprintf(stderr,"Starting alignment. Writing results to %s.*\n",split_output_root);
+ } else {
+ fprintf(stderr,"Starting alignment\n");
+ }
+ stopwatch = Stopwatch_new();
+ Stopwatch_start(stopwatch);
+ }
+
+#else
+ worker_setup(genomesubdir,fileroot);
+ FREE(genomesubdir);
+ FREE(fileroot);
+
+ outbuffer = Outbuffer_new(output_buffer_size,nread);
+ Inbuffer_set_outbuffer(inbuffer,outbuffer);
+
+ if (output_file != NULL) {
+ fprintf(stderr,"Starting alignment. Writing results to %s\n",output_file);
+ } else if (split_output_root != NULL) {
+ fprintf(stderr,"Starting alignment. Writing results to %s.*\n",split_output_root);
+ } else {
+ fprintf(stderr,"Starting alignment\n");
+ }
+ stopwatch = Stopwatch_new();
+ Stopwatch_start(stopwatch);
+#endif
+
+
+
+#if !defined(HAVE_PTHREAD)
+ /* Serial version */
+ single_thread();
+
+#else
+ /* Pthreads version */
+ if (nthreads == 0) {
+ single_thread();
+
+ } else if (multiple_sequences_p == false) {
+ single_thread();
+
+ } else {
+ pthread_attr_init(&thread_attr_join);
+ if ((ret = pthread_attr_setdetachstate(&thread_attr_join,PTHREAD_CREATE_JOINABLE)) != 0) {
+ fprintf(stderr,"ERROR: pthread_attr_setdetachstate returned %d\n",ret);
+ exit(1);
+ }
+
+#ifdef USE_MPI
+ /* Master rank that is working or a Slave rank */
+ if (myid == 0) {
+ if (split_output_root == NULL && output_file == NULL) {
+ pthread_create(&write_stdout_thread_id,&thread_attr_detach,Master_write_stdout,(void *) NULL);
+ }
+ pthread_create(&parser_thread_id,&thread_attr_detach,Master_parser,(void *) master);
+ pthread_create(&mpi_interface_thread_id,&thread_attr_join,Master_mpi_interface,(void *) master);
+ }
+#endif
+
+ worker_thread_ids = (pthread_t *) CALLOC(nthreads,sizeof(pthread_t));
+ Except_init_pthread();
+ pthread_key_create(&global_request_key,NULL);
+
+ if (orderedp == true) {
+ pthread_create(&output_thread_id,&thread_attr_join,Outbuffer_thread_ordered,
+ (void *) outbuffer);
+ } else {
+ pthread_create(&output_thread_id,&thread_attr_join,Outbuffer_thread_anyorder,
+ (void *) outbuffer);
+ }
+
+ for (worker_id = 0; worker_id < nthreads; worker_id++) {
+ /* Need to have worker threads finish before we call Inbuffer_free() */
+ pthread_create(&(worker_thread_ids[worker_id]),&thread_attr_join,worker_thread,(void *) worker_id);
+ }
+
+ pthread_join(output_thread_id,NULL);
+ for (worker_id = 0; worker_id < nthreads; worker_id++) {
+ pthread_join(worker_thread_ids[worker_id],NULL);
+ }
+#ifdef USE_MPI
+ if (myid == 0) {
+ pthread_join(mpi_interface_thread_id,NULL);
+ }
+#endif
+
+ pthread_key_delete(global_request_key);
+ /* Do not delete global_except_key, because worker threads might still need it */
+ /* Except_term_pthread(); */
+
+ FREE(worker_thread_ids);
+
+ }
+#endif /* HAVE_PTHREAD */
+
+#ifdef USE_MPI
+ /* MPI worker ranks finished with creating output_thread and worker_threads below */
+ }
+#endif
+
+
+ /* Note: Shortread and Sequence procedures should close their own input files */
+#ifdef USE_MPI
+ if (myid == 0) {
+ runtime = Stopwatch_stop(stopwatch);
+ Stopwatch_free(&stopwatch);
+
+ nread = Master_ntotal(master);
+ fprintf(stderr,"Processed %u queries in %.2f seconds (%.2f queries/sec)\n",
+ nread,runtime,(double) nread/runtime);
+ /* Master_free(&master); -- Master_parser thread still needs this */
+ }
+
+ if (myid > 0 || master_is_worker_p) {
+ Outbuffer_free(&outbuffer);
+ Inbuffer_free(&inbuffer);
+ }
+
+ Outbuffer_close_files(); /* All ranks have to close the files */
+
+#else
+ /* Single CPU or Pthreads version */
+ runtime = Stopwatch_stop(stopwatch);
+ Stopwatch_free(&stopwatch);
+
+ nread = Outbuffer_nread(outbuffer);
+ /* nbeyond = Outbuffer_nbeyond(outbuffer); */
+ fprintf(stderr,"Processed %u queries in %.2f seconds (%.2f queries/sec)\n",
+ nread,runtime,(double) nread/runtime);
+
+ Outbuffer_free(&outbuffer);
+ Inbuffer_free(&inbuffer);
+
+ Outbuffer_close_files();
+#endif
+
+ Outbuffer_cleanup();
+
+#ifdef USE_MPI
+ if (myid > 0 || master_is_worker_p == true) {
+ worker_cleanup();
+ MPI_Comm_free(&workers_comm);
+ }
+ MPI_Barrier(MPI_COMM_WORLD); /* Make sure all processes have cleaned up */
+ MPI_Finalize();
+#else
+ worker_cleanup();
+#endif
+
return 0;
}
@@ -3238,7 +3812,7 @@ Usage: gsnap [OPTIONS...] <FASTA file>, or\n\
(default %d)\n\
",barcode_length);
fprintf(stdout,"\
- -o, --orientation=STRING Orientation of paired-end reads\n\
+ --orientation=STRING Orientation of paired-end reads\n\
Allowed values: FR (fwd-rev, or typical Illumina; default),\n\
RF (rev-fwd, for circularized inserts), or FF (fwd-fwd, same strand)\n\
--fastq-id-start=INT Starting position of identifier in FASTQ header, space-delimited (>= 1)\n\
@@ -3290,9 +3864,9 @@ is still designed to be fast.\n\
Mode Offsets Positions Genome Suffix array\n\
0 see note mmap mmap mmap\n\
1 see note mmap & preload mmap mmap\n\
- (default) 2 see note mmap & preload mmap & preload mmap & preload\n\
+ 2 see note mmap & preload mmap & preload mmap & preload\n\
3 see note allocate mmap & preload mmap & preload\n\
- 4 see note allocate allocate mmap & preload\n\
+ (default) 4 see note allocate allocate mmap & preload\n\
5 see note allocate allocate allocate\n\
Note: For a single sequence, all data structures use mmap\n\
If mmap not available and allocate not chosen, then will use fileio (very slow)\n\
@@ -3301,7 +3875,7 @@ is still designed to be fast.\n\
fprintf(stdout,"\
-B, --batch=INT Batch mode (default = 5, modes 0-4 disallowed because program configured without mmap)\n\
Mode Offsets Positions Genome Suffix array\n\
- (default) 5 see note allocate allocate allocate\n \
+ (default) 5 see note allocate allocate allocate\n\
");
#endif
fprintf(stdout,"\
@@ -3309,6 +3883,8 @@ is still designed to be fast.\n\
independently by the --expand-offsets flag. However, offsets\n\
are accessed relatively fast in this version of GSNAP.\n\
\n\
+ --use-shared-memory=INT If 1 (default), then allocated memory is shared among all processes\n\
+ on this node. If 0, then each process has private allocated memory\n\
--expand-offsets=INT Whether to expand the genomic offsets index\n\
Values: 0 (no, default), or 1 (yes).\n\
Expansion gives faster alignment, but requires more memory\n\
@@ -3436,15 +4012,21 @@ is still designed to be fast.\n\
--mode=STRING Alignment mode: standard (default), cmet-stranded, cmet-nonstranded,\n\
atoi-stranded, or atoi-nonstranded. Non-standard modes requires you\n\
to have previously run the cmetindex or atoiindex programs on the genome\n\
- --tallydir=STRING Directory for tally IIT file to resolve concordant multiple results (default is\n\
+");
+
+
+#if 0
+ fprintf(stdout,"\
+ --tallydir=STRING Directory for tally IIT file to resolve concordant multiple alignments (default is\n\
location of genome index files specified using -D and -d). Note: can\n\
just give full path name to --use-tally instead.\n\
- --use-tally=STRING Use this tally IIT file to resolve concordant multiple results\n\
- --runlengthdir=STRING Directory for runlength IIT file to resolve concordant multiple results (default is\n\
+ --use-tally=STRING Use this tally IIT file to resolve concordant multiple alignments\n\
+ --runlengthdir=STRING Directory for runlength IIT file to resolve concordant multiple alignments (default is\n\
location of genome index files specified using -D and -d). Note: can\n\
just give full path name to --use-runlength instead.\n\
- --use-runlength=STRING Use this runlength IIT file to resolve concordant multiple results\n\
+ --use-runlength=STRING Use this runlength IIT file to resolve concordant multiple alignments\n\
");
+#endif
#if 0
@@ -3516,6 +4098,15 @@ is still designed to be fast.\n\
/* Splicing options */
+ fprintf(stdout,"Splicing options for DNA-Seq\n");
+ fprintf(stdout,"\
+ --find-dna-chimeras=INT Look for distant splicing in DNA-Seq data (0=no (default), 1=yes)\n\
+ Automatically inactivated for RNA-Seq data\n\
+ if -N or -s are specified)\n\
+");
+ fprintf(stdout,"\n");
+
+ /* Splicing options */
fprintf(stdout,"Splicing options for RNA-Seq\n");
fprintf(stdout,"\
-N, --novelsplicing=INT Look for novel splicing (0=no (default), 1=yes)\n\
@@ -3584,12 +4175,13 @@ is still designed to be fast.\n\
Should probably match the value for -w, --localsplicedist.\n\
",pairmax_rna);
fprintf(stdout,"\
- --pairexpect=INT Expected paired-end length, previously used for calling splices in medial part\n\
- of paired-end reads (default %d). Currently not used.\n\
+ --pairexpect=INT Expected paired-end length, used for calling splices in medial part\n\
+ of paired-end reads (default %d). Was turned off in previous versions, but reinstated.\n\
",expected_pairlength);
fprintf(stdout,"\
- --pairdev=INT Allowable deviation from expected paired-end length, previously used for\n\
- calling splices in medial part of paired-end reads (default %d). Currently not used.\n\
+ --pairdev=INT Allowable deviation from expected paired-end length, used for\n\
+ calling splices in medial part of paired-end reads (default %d).\n\
+ Was turned off in previous versions, but reinstated.\n\
",pairlength_deviation);
fprintf(stdout,"\n");
@@ -3632,24 +4224,16 @@ is still designed to be fast.\n\
--nofails Exclude printing of failed alignments\n\
");
-#ifdef HAVE_GOBY
- fprintf(stdout,"\
- -A, --format=STRING Another format type, other than default.\n\
- Currently implemented: sam, goby\n\
-");
-#else
fprintf(stdout,"\
-A, --format=STRING Another format type, other than default.\n\
Currently implemented: sam, m8 (BLAST tabular format)\n\
- Also allowed, but not installed at compile-time: goby\n\
- (To install, need to re-compile with appropriate options)\n\
");
-#endif
fprintf(stdout,"\
--split-output=STRING Basename for multiple-file output, separately for nomapping,\n\
halfmapping_uniq, halfmapping_mult, unpaired_uniq, unpaired_mult,\n\
paired_uniq, paired_mult, concordant_uniq, and concordant_mult results\n\
+ -o, --output-file=STRING File name for a single stream of output results.\n\
--failed-input=STRING Print completely failed alignments as input FASTA or FASTQ format,\n\
to the given file, appending .1 or .2, for paired-end data.\n\
If the --split-output flag is also given, this file is generated\n\
@@ -3685,7 +4269,7 @@ is still designed to be fast.\n\
differ from reference but match a known alternate allele\n\
--extend-soft-clips Extends alignments through soft clipped regions\n\
--action-if-cigar-error Action to take if there is a disagreement between CIGAR length and sequence length\n\
- Allowed values: ignore, warning (default), abort\n\
+ Allowed values: ignore, warning, noprint (default), abort\n\
--read-group-id=STRING Value to put into read-group id (RG-ID) field\n\
--read-group-name=STRING Value to put into read-group name (RG-SM) field\n\
--read-group-library=STRING Value to put into read-group library (RG-LB) field\n\
@@ -3693,19 +4277,6 @@ is still designed to be fast.\n\
");
fprintf(stdout,"\n");
-
-#ifdef HAVE_GOBY
- /* Goby options */
- fprintf(stdout,"Options for Goby library\n");
- fprintf(stdout,"\
- --goby-output=STRING Basename for Goby output files\n\
- --creads-window-start=INT Compact reads window start (default: 0=start of file)\n\
- --creads-window-end=INT Compact reads window end (default: 0=end of file)\n\
- --creads-complement Complement read sequences (without reversing)\n\
-");
- fprintf(stdout,"\n");
-#endif
-
/* Help options */
fprintf(stdout,"Help options\n");
fprintf(stdout,"\
diff --git a/src/iit-read-univ.c b/src/iit-read-univ.c
index 1f1ca44..3cef0d9 100644
--- a/src/iit-read-univ.c
+++ b/src/iit-read-univ.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit-read-univ.c 153955 2014-11-24 17:54:45Z twu $";
+static char rcsid[] = "$Id: iit-read-univ.c 161940 2015-03-25 20:36:59Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -223,7 +223,7 @@ Univ_IIT_genomelength (T chromosome_iit, bool with_circular_alias_p) {
bool *
-Univ_IIT_circularp (T chromosome_iit) {
+Univ_IIT_circularp (bool *any_circular_p, T chromosome_iit) {
bool *circularp;
Univinterval_T interval;
int chrnum, nchromosomes;
@@ -232,11 +232,13 @@ Univ_IIT_circularp (T chromosome_iit) {
nchromosomes = chromosome_iit->total_nintervals;
circularp = (bool *) CALLOC(nchromosomes+1,sizeof(bool));
+ *any_circular_p = false;
circularp[0] = false; /* chrnum of 0 indicates translocation */
if ((circular_typeint = Univ_IIT_typeint(chromosome_iit,"circular")) >= 0) {
for (chrnum = 0; chrnum < nchromosomes; chrnum++) {
interval = &(chromosome_iit->intervals[chrnum]);
if (Univinterval_type(interval) == circular_typeint) {
+ *any_circular_p = true;
circularp[chrnum+1] = true;
}
}
@@ -582,6 +584,128 @@ Univ_IIT_dump_fai (T this) {
}
+#ifdef USE_MPI
+/* For chromosome.iit file, which is stored in version 1 */
+void
+Univ_IIT_dump_sam (MPI_File fp, T this, char *sam_read_group_id, char *sam_read_group_name,
+ char *sam_read_group_library, char *sam_read_group_platform) {
+ int index = 0, i;
+ Univinterval_T interval;
+ Chrpos_T interval_length;
+ char *label, buffer[20];
+ bool allocp;
+ int circular_typeint;
+
+ if (this == NULL) {
+ return;
+ } else {
+ circular_typeint = Univ_IIT_typeint(this,"circular");
+ }
+
+ for (i = 0; i < this->total_nintervals; i++) {
+ interval = &(this->intervals[i]);
+ label = Univ_IIT_label(this,index+1,&allocp);
+ MPI_File_write_shared(fp,"@SQ\tSN:",strlen("@SQ\tSN:"),MPI_CHAR,MPI_STATUS_IGNORE);
+ MPI_File_write_shared(fp,label,strlen(label),MPI_CHAR,MPI_STATUS_IGNORE);
+ if (allocp == true) {
+ FREE(label);
+ }
+ /* startpos = Univinterval_low(interval); */
+ /* endpos = startpos + Univinterval_length(interval) - 1U; */
+
+ interval_length = Univinterval_length(interval);
+ sprintf(buffer,"%u",interval_length);
+ MPI_File_write_shared(fp,"\tLN:%s",strlen("\tLN:")+strlen(buffer),MPI_CHAR,MPI_STATUS_IGNORE);
+ if (Univinterval_type(interval) == circular_typeint) {
+ MPI_File_write_shared(fp,"\ttp:circular",strlen("\ttp:circular"),MPI_CHAR,MPI_STATUS_IGNORE);
+ }
+ MPI_File_write_shared(fp,"\n",1,MPI_CHAR,MPI_STATUS_IGNORE);
+
+ index++;
+ }
+
+ if (sam_read_group_id != NULL) {
+ MPI_File_write_shared(fp,"@RG\tID:",strlen("@RG\tID:"),MPI_CHAR,MPI_STATUS_IGNORE);
+ MPI_File_write_shared(fp,sam_read_group_id,strlen(sam_read_group_id),MPI_CHAR,MPI_STATUS_IGNORE);
+
+ if (sam_read_group_platform != NULL) {
+ MPI_File_write_shared(fp,"\tPL:",strlen("\tPL:"),MPI_CHAR,MPI_STATUS_IGNORE);
+ MPI_File_write_shared(fp,sam_read_group_platform,strlen(sam_read_group_platform),MPI_CHAR,MPI_STATUS_IGNORE);
+ }
+ if (sam_read_group_library != NULL) {
+ MPI_File_write_shared(fp,"\tLB:",strlen("\tLB:"),MPI_CHAR,MPI_STATUS_IGNORE);
+ MPI_File_write_shared(fp,sam_read_group_library,strlen(sam_read_group_library),MPI_CHAR,MPI_STATUS_IGNORE);
+ }
+ MPI_File_write_shared(fp,"\tSM:",strlen("\tSM:"),MPI_CHAR,MPI_STATUS_IGNORE);
+ MPI_File_write_shared(fp,sam_read_group_name,strlen(sam_read_group_name),MPI_CHAR,MPI_STATUS_IGNORE);
+ MPI_File_write_shared(fp,"\n",1,MPI_CHAR,MPI_STATUS_IGNORE);
+ }
+
+ return;
+}
+
+
+int
+Univ_IIT_reserve_sam (T this, char *sam_read_group_id, char *sam_read_group_name,
+ char *sam_read_group_library, char *sam_read_group_platform) {
+ int nchars = 0;
+ int index = 0, i;
+ Univinterval_T interval;
+ Chrpos_T interval_length;
+ char *label, buffer[20];
+ bool allocp;
+ int circular_typeint;
+
+ if (this == NULL) {
+ return 0;
+ } else {
+ circular_typeint = Univ_IIT_typeint(this,"circular");
+ }
+
+ for (i = 0; i < this->total_nintervals; i++) {
+ interval = &(this->intervals[i]);
+ label = Univ_IIT_label(this,index+1,&allocp);
+ nchars += strlen("@SQ\tSN:");
+ nchars += strlen(label);
+ if (allocp == true) {
+ FREE(label);
+ }
+ /* startpos = Univinterval_low(interval); */
+ /* endpos = startpos + Univinterval_length(interval) - 1U; */
+
+ interval_length = Univinterval_length(interval);
+ sprintf(buffer,"%u",interval_length);
+ nchars += strlen("\tLN:")+strlen(buffer);
+ if (Univinterval_type(interval) == circular_typeint) {
+ nchars += strlen("\ttp:circular");
+ }
+ nchars += strlen("\n");
+
+ index++;
+ }
+
+ if (sam_read_group_id != NULL) {
+ nchars += strlen("@RG\tID:");
+ nchars += strlen(sam_read_group_id);
+
+ if (sam_read_group_platform != NULL) {
+ nchars += strlen("\tPL:");
+ nchars += strlen(sam_read_group_platform);
+ }
+ if (sam_read_group_library != NULL) {
+ nchars += strlen("\tLB:");
+ nchars += strlen(sam_read_group_library);
+ }
+ nchars += strlen("\tSM:");
+ nchars += strlen(sam_read_group_name);
+ nchars += strlen("\n");
+ }
+
+ return nchars;
+}
+
+
+#else
/* For chromosome.iit file, which is stored in version 1 */
void
Univ_IIT_dump_sam (FILE *fp, T this, char *sam_read_group_id, char *sam_read_group_name,
@@ -631,6 +755,8 @@ Univ_IIT_dump_sam (FILE *fp, T this, char *sam_read_group_id, char *sam_read_gro
return;
}
+#endif
+
Chrpos_T *
@@ -775,7 +901,10 @@ Univ_IIT_free (T *old) {
FREE((*old)->labelorder);
/* close((*old)->fd); -- closed in read_annotations */
- } else if ((*old)->access == ALLOCATED) {
+ } else if ((*old)->access == ALLOCATED_PRIVATE) {
+ /* Nothing to close. IIT must have been created by Univ_IIT_new. */
+
+ } else if ((*old)->access == ALLOCATED_SHARED) {
/* Nothing to close. IIT must have been created by Univ_IIT_new. */
} else {
diff --git a/src/iit-read-univ.h b/src/iit-read-univ.h
index f188530..29e30ac 100644
--- a/src/iit-read-univ.h
+++ b/src/iit-read-univ.h
@@ -1,9 +1,13 @@
-/* $Id: iit-read-univ.h 149319 2014-09-30 02:15:42Z twu $ */
+/* $Id: iit-read-univ.h 157228 2015-01-22 18:49:11Z twu $ */
#ifndef IIT_READ_UNIV_INCLUDED
#define IIT_READ_UNIV_INCLUDED
typedef struct Univ_IIT_T *Univ_IIT_T;
+#ifdef USE_MPI
+#include <mpi.h>
+#endif
+
#include <stdio.h>
#include "bool.h"
#include "uintlist.h"
@@ -25,7 +29,7 @@ Univ_IIT_length (T this, int index);
extern Univcoord_T
Univ_IIT_genomelength (T chromosome_iit, bool with_circular_alias_p);
extern bool *
-Univ_IIT_circularp (T chromosome_iit);
+Univ_IIT_circularp (bool *any_circular_p, T chromosome_iit);
extern Univinterval_T
Univ_IIT_interval (T this, int index);
extern Univcoord_T
@@ -64,9 +68,23 @@ extern void
Univ_IIT_dump_table (T this, bool zerobasedp);
extern void
Univ_IIT_dump_fai (T this);
+
extern void
-Univ_IIT_dump_sam (FILE *fp, T this, char *sam_read_group_id, char *sam_read_group_name,
+Univ_IIT_dump_sam (
+#ifdef USE_MPI
+ MPI_File fp,
+#else
+ FILE *fp,
+#endif
+ T this, char *sam_read_group_id, char *sam_read_group_name,
char *sam_read_group_library, char *sam_read_group_platform);
+
+#ifdef USE_MPI
+extern int
+Univ_IIT_reserve_sam (T this, char *sam_read_group_id, char *sam_read_group_name,
+ char *sam_read_group_library, char *sam_read_group_platform);
+#endif
+
extern Chrpos_T *
Univ_IIT_chrlengths (T this);
extern void
diff --git a/src/iit-read.c b/src/iit-read.c
index 3fdebda..6194e60 100644
--- a/src/iit-read.c
+++ b/src/iit-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit-read.c 164704 2015-05-01 20:24:48Z twu $";
+static char rcsid[] = "$Id: iit-read.c 164702 2015-05-01 20:22:25Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1413,7 +1413,10 @@ IIT_free (T *old) {
FREE((*old)->valueorder);
}
- } else if ((*old)->access == ALLOCATED) {
+ } else if ((*old)->access == ALLOCATED_PRIVATE) {
+ /* Nothing to close. IIT must have been created by IIT_new. */
+
+ } else if ((*old)->access == ALLOCATED_SHARED) {
/* Nothing to close. IIT must have been created by IIT_new. */
} else {
@@ -5919,7 +5922,7 @@ IIT_typelist (T this) {
/* Assume 0-based index */
static void
-print_header (FILE *fp, T this, int recno, char *chr, bool map_bothstrands_p,
+print_header (Filestring_T fp, T this, int recno, char *chr, bool map_bothstrands_p,
bool relativep, Chrpos_T left, bool print_comment_p) {
char *string, *restofheader, *p;
Interval_T interval;
@@ -5930,36 +5933,36 @@ print_header (FILE *fp, T this, int recno, char *chr, bool map_bothstrands_p,
string = IIT_label(this,recno+1,&allocp);
- fprintf(fp,"\t%s",this->name);
+ FPRINTF(fp,"\t%s",this->name);
interval = &(this->intervals[0][recno]);
if (relativep == true) {
if (Interval_sign(interval) >= 0) {
- fprintf(fp,"\t%u..%u",Interval_low(interval)-left,Interval_high(interval)-left);
+ FPRINTF(fp,"\t%u..%u",Interval_low(interval)-left,Interval_high(interval)-left);
} else {
- fprintf(fp,"\t%u..%u",Interval_high(interval)-left,Interval_low(interval)-left);
+ FPRINTF(fp,"\t%u..%u",Interval_high(interval)-left,Interval_low(interval)-left);
}
} else {
if (Interval_sign(interval) >= 0) {
- fprintf(fp,"\t%s:%u..%u",chr,Interval_low(interval),Interval_high(interval));
+ FPRINTF(fp,"\t%s:%u..%u",chr,Interval_low(interval),Interval_high(interval));
} else {
- fprintf(fp,"\t%s:%u..%u",chr,Interval_high(interval),Interval_low(interval));
+ FPRINTF(fp,"\t%s:%u..%u",chr,Interval_high(interval),Interval_low(interval));
}
}
#if 0
if (map_bothstrands_p == true) {
if ((typeint = Interval_type(interval)) <= 0) {
- fprintf(fp,"\t\t%s",string);
+ FPRINTF(fp,"\t\t%s",string);
} else {
- fprintf(fp,"\t%s\t%s",IIT_typestring(this,typeint),string);
+ FPRINTF(fp,"\t%s\t%s",IIT_typestring(this,typeint),string);
}
} else {
#endif
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
p = string;
while (*p != '\0' && *p != '\n') {
- putc(*p,fp);
+ PUTC(*p,fp);
p++;
}
@@ -5973,9 +5976,9 @@ print_header (FILE *fp, T this, int recno, char *chr, bool map_bothstrands_p,
if (print_comment_p == true) {
p = IIT_annotation(&restofheader,this,recno+1,&allocp);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
while (*p != '\0' && *p != '\n') {
- putc(*p,fp);
+ PUTC(*p,fp);
p++;
}
@@ -5984,14 +5987,14 @@ print_header (FILE *fp, T this, int recno, char *chr, bool map_bothstrands_p,
}
}
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
return;
}
void
-IIT_print_header (FILE *fp, T this, int *matches, int nmatches, bool map_bothstrands_p,
+IIT_print_header (Filestring_T fp, T this, int *matches, int nmatches, bool map_bothstrands_p,
char *chr, bool reversep, bool relativep, Chrpos_T left,
bool print_comment_p) {
int recno, i;
diff --git a/src/iit-read.h b/src/iit-read.h
index e2a622b..208fc7b 100644
--- a/src/iit-read.h
+++ b/src/iit-read.h
@@ -1,7 +1,6 @@
-/* $Id: iit-read.h 157232 2015-01-22 18:55:31Z twu $ */
+/* $Id: iit-read.h 157225 2015-01-22 18:47:23Z twu $ */
#ifndef IIT_READ_INCLUDED
#define IIT_READ_INCLUDED
-
#ifdef HAVE_CONFIG_H
#include <config.h> /* For HAVE_64_BIT */
#endif
@@ -13,6 +12,7 @@
#include "interval.h"
#include "types.h"
#include "iitdef.h"
+#include "filestring.h"
typedef enum {READ_ALL, READ_ONE, READ_NONE} Divread_T;
@@ -21,6 +21,7 @@ typedef enum {READ_ALL, READ_ONE, READ_NONE} Divread_T;
typedef enum {NO_KNOWN_GENE, KNOWN_GENE, KNOWN_GENE_MULTIEXON} Overlap_T;
+
#define T IIT_T
extern bool
@@ -213,7 +214,7 @@ extern List_T
IIT_typelist (T this);
extern void
-IIT_print_header (FILE *fp, T this, int *matches, int nmatches, bool map_bothstrands_p,
+IIT_print_header (Filestring_T fp, T this, int *matches, int nmatches, bool map_bothstrands_p,
char *chr, bool reversep, bool relativep, Chrpos_T left, bool print_comment_p);
extern Overlap_T
diff --git a/src/iit-write-univ.h b/src/iit-write-univ.h
index ae5fb1f..031a579 100644
--- a/src/iit-write-univ.h
+++ b/src/iit-write-univ.h
@@ -1,6 +1,7 @@
-/* $Id: iit-write-univ.h 132144 2014-04-02 16:02:28Z twu $ */
+/* $Id: iit-write-univ.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef IIT_WRITE_UNIV_INCLUDED
#define IIT_WRITE_UNIV_INCLUDED
+
#include "bool.h"
#include "list.h"
#include "uintlist.h"
diff --git a/src/iit-write.h b/src/iit-write.h
index 486245d..a974ad4 100644
--- a/src/iit-write.h
+++ b/src/iit-write.h
@@ -1,6 +1,7 @@
-/* $Id: iit-write.h 132144 2014-04-02 16:02:28Z twu $ */
+/* $Id: iit-write.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef IIT_WRITE_INCLUDED
#define IIT_WRITE_INCLUDED
+
#include "bool.h"
#include "list.h"
#include "uintlist.h"
diff --git a/src/iitdef.h b/src/iitdef.h
index 1a2bc67..f18fd10 100644
--- a/src/iitdef.h
+++ b/src/iitdef.h
@@ -1,8 +1,8 @@
-/* $Id: iitdef.h 138717 2014-06-11 17:06:45Z twu $ */
+/* $Id: iitdef.h 157223 2015-01-22 18:43:01Z twu $ */
#ifndef IITDEF_INCLUDED
#define IITDEF_INCLUDED
#ifdef HAVE_CONFIG_H
-#include <config.h>
+#include <config.h> /* For HAVE_SYS_TYPES_H, HAVE_PTHREAD */
#endif
#ifdef HAVE_SYS_TYPES_H
diff --git a/src/inbuffer.c b/src/inbuffer.c
index f3a09e5..92f2390 100644
--- a/src/inbuffer.c
+++ b/src/inbuffer.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: inbuffer.c 101822 2013-07-17 18:43:45Z twu $";
+static char rcsid[] = "$Id: inbuffer.c 160102 2015-03-03 21:04:01Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -16,6 +16,9 @@ static char rcsid[] = "$Id: inbuffer.c 101822 2013-07-17 18:43:45Z twu $";
#include "mem.h"
+#ifdef USE_MPI
+#include "filestring.h"
+#endif
#ifdef GSNAP
#include "shortread.h"
#endif
@@ -28,16 +31,83 @@ static char rcsid[] = "$Id: inbuffer.c 101822 2013-07-17 18:43:45Z twu $";
#endif
+static bool filter_if_both_p;
+
+#if defined(USE_MPI) && defined(USE_MPI_FILE_INPUT)
+static MPI_Comm workers_comm;
+#endif
+
+#ifndef GSNAP
+static bool user_pairalign_p;
+static Sequence_T global_usersegment;
+#endif
+
+static int part_modulus;
+static int part_interval;
+
+void
+Inbuffer_setup (bool filter_if_both_p_in,
+#if defined(USE_MPI) && defined(USE_MPI_FILE_INPUT)
+ MPI_Comm workers_comm_in,
+#endif
+#ifndef GSNAP
+ bool user_pairalign_p_in, Sequence_T global_usersegment_in,
+#endif
+
+ int part_modulus_in, int part_interval_in) {
+ filter_if_both_p = filter_if_both_p_in;
+
+#if defined(USE_MPI) && defined(USE_MPI_FILE_INPUT)
+ workers_comm = workers_comm_in;
+#endif
+
+#ifndef GSNAP
+ user_pairalign_p = user_pairalign_p_in;
+ global_usersegment = global_usersegment_in;
+#endif
+
+ part_modulus = part_modulus_in;
+ part_interval = part_interval_in;
+
+ return;
+}
+
+
+
#define T Inbuffer_T
struct T {
+#ifdef USE_MPI
+ Master_T master;
+#endif
Outbuffer_T outbuffer;
- bool filter_if_both_p;
+#if defined(USE_MPI) && defined(USE_MPI_FILE_INPUT)
+ MPI_File input;
+#ifdef GSNAP
+ MPI_File input2;
+#endif
+
+#elif (defined(USE_MPI))
+ FILE *input;
+#ifdef GSNAP
+ FILE *input2;
+#endif
+
+#else
FILE *input;
#ifdef GSNAP
FILE *input2;
#endif
+#endif
+
+#ifdef USE_MPI
+ int myid;
+ char *filecontents1_alloc;
+ char *filecontents1;
+ char *filecontents2_alloc;
+ char *filecontents2;
+#endif
#ifdef HAVE_ZLIB
gzFile gzipped;
@@ -59,29 +129,15 @@ struct T {
int nfiles;
int nextchar;
-#ifdef GSNAP
- Gobyreader_T gobyreader;
- bool fastq_format_p;
- bool creads_format_p;
- int barcode_length;
- bool invert_first_p;
- bool invert_second_p;
- bool chop_primers_p;
-#else
- bool maponlyp;
-#endif
-
- int part_modulus;
- int part_interval;
-
- unsigned int nspaces;
- unsigned int maxchars;
-
-#ifdef HAVE_PTHREAD
+#if defined(HAVE_PTHREAD)
pthread_mutex_t lock;
#endif
+#ifndef GSNAP
+ Sequence_T pairalign_segment;
+#endif
Request_T *buffer;
+ unsigned int nspaces;
int ptr;
int nleft;
int inputid;
@@ -94,18 +150,22 @@ T
Inbuffer_cmdline (char *contents, int length) {
T new = (T) MALLOC(sizeof(*new));
+#if defined(USE_MPI) && defined(USE_MPI_FILE_INPUT)
+ new->input = (MPI_File) NULL;
+#else
new->input = (FILE *) NULL;
+#endif
+
+#ifdef USE_MPI
+ new->filecontents1_alloc = (char *) NULL;
+ new->filecontents2_alloc = (char *) NULL;
+#endif
+
new->files = (char **) NULL;
new->nfiles = 0;
new->nextchar = '\0';
- new->maponlyp = false;
-
- new->part_modulus = 0;
- new->part_interval = 1;
-
- new->nspaces = 0;
- /* new->maxchars = maxchars; */
+ new->pairalign_segment = (Sequence_T) NULL;
new->buffer = (Request_T *) CALLOC(1,sizeof(Request_T));
new->ptr = 0;
@@ -115,7 +175,7 @@ Inbuffer_cmdline (char *contents, int length) {
new->buffer[0] = Request_new(new->requestid++,Sequence_genomic_new(contents,length,/*copyp*/true));
-#ifdef HAVE_PTHREAD
+#if defined(HAVE_PTHREAD)
pthread_mutex_init(&new->lock,NULL);
#endif
@@ -124,35 +184,37 @@ Inbuffer_cmdline (char *contents, int length) {
#endif
-
T
-Inbuffer_new (int nextchar, FILE *input,
+Inbuffer_new (int nextchar,
+#ifdef USE_MPI
+ int myid,
+#endif
+#if defined(USE_MPI) && defined(USE_MPI_FILE_INPUT)
+ MPI_File input,
+#else
+ FILE *input,
+#endif
#ifdef GSNAP
+#if defined(USE_MPI) && defined(USE_MPI_FILE_INPUT)
+ MPI_File input2,
+#else
FILE *input2,
+#endif
#ifdef HAVE_ZLIB
gzFile gzipped, gzFile gzipped2,
#endif
#ifdef HAVE_BZLIB
Bzip2_T bzipped, Bzip2_T bzipped2,
#endif
-#ifdef HAVE_GOBY
- Gobyreader_T gobyreader,
-#endif
-#endif
- char **files, int nfiles,
-#ifdef GSNAP
- bool fastq_format_p, bool creads_format_p,
- int barcode_length, bool invert_first_p, bool invert_second_p,
- bool chop_primers_p,
-#else
- bool maponlyp,
#endif
- unsigned int nspaces, unsigned int maxchars, int part_interval, int part_modulus,
- bool filter_if_both_p) {
+ char **files, int nfiles, unsigned int nspaces) {
T new = (T) MALLOC(sizeof(*new));
- new->filter_if_both_p = filter_if_both_p;
+#ifdef USE_MPI
+ new->myid = myid;
+#endif
+
new->input = input;
#ifdef GSNAP
new->input2 = input2;
@@ -170,42 +232,26 @@ Inbuffer_new (int nextchar, FILE *input,
new->bzipped = (void *) NULL;
new->bzipped2 = (void *) NULL;
#endif
-#ifdef HAVE_GOBY
- new->gobyreader = gobyreader;
#endif
+
+#ifdef USE_MPI
+ new->filecontents1_alloc = (char *) NULL;
+ new->filecontents2_alloc = (char *) NULL;
#endif
new->files = files;
new->nfiles = nfiles;
new->nextchar = nextchar;
-#ifdef GSNAP
- new->fastq_format_p = fastq_format_p;
- new->creads_format_p = creads_format_p;
- new->barcode_length = barcode_length;
- new->invert_first_p = invert_first_p;
- new->invert_second_p = invert_second_p;
- new->chop_primers_p = chop_primers_p;
-#if 0
- if (chop_primers_p == true) {
- Shortread_dynprog_init(MAX_READLENGTH);
- }
-#endif
-#else
- new->maponlyp = maponlyp;
-#endif
-
- new->part_modulus = part_modulus;
- new->part_interval = part_interval;
-
- new->nspaces = nspaces;
- new->maxchars = maxchars;
-
-#ifdef HAVE_PTHREAD
+#if defined(HAVE_PTHREAD)
pthread_mutex_init(&new->lock,NULL);
#endif
+#ifndef GSNAP
+ new->pairalign_segment = (Sequence_T) NULL;
+#endif
new->buffer = (Request_T *) CALLOC(nspaces,sizeof(Request_T));
+ new->nspaces = nspaces;
new->ptr = 0;
new->nleft = 0;
new->inputid = 0;
@@ -214,21 +260,33 @@ Inbuffer_new (int nextchar, FILE *input,
return new;
}
+#ifdef USE_MPI
+void
+Inbuffer_set_master (T this, Master_T master) {
+ this->master = master;
+ return;
+}
+#endif
+
void
Inbuffer_set_outbuffer (T this, Outbuffer_T outbuffer) {
this->outbuffer = outbuffer;
return;
}
-
void
Inbuffer_free (T *old) {
if (*old) {
/* No need to close input, since done by Shortread and Sequence read procedures */
+#ifdef USE_MPI
+ FREE_IN((*old)->filecontents1_alloc);
+ FREE_IN((*old)->filecontents2_alloc);
+#endif
+
FREE((*old)->buffer);
-#ifdef HAVE_PTHREAD
+#if defined(HAVE_PTHREAD)
pthread_mutex_destroy(&(*old)->lock);
#endif
@@ -238,324 +296,459 @@ Inbuffer_free (T *old) {
}
-#ifdef GSNAP
+#ifndef GSNAP
+/* Can delete when we remove worker_mpi_process from gmap.c */
+Sequence_T
+Inbuffer_read (Sequence_T *pairalign_segment, T this, bool skipp) {
+ Sequence_T queryseq;
+
+ queryseq = Sequence_read_multifile(&this->nextchar,&this->input,&this->files,&this->nfiles);
+ if (skipp == true) {
+ Sequence_free(&queryseq);
+ }
+
+ if (user_pairalign_p == true) {
+ /* assert(this->nspaces == 1) */
+ if (this->pairalign_segment != NULL) {
+ Sequence_free(&this->pairalign_segment);
+ }
+ this->pairalign_segment = Sequence_read_unlimited(&this->nextchar,stdin);
+ debug(printf(" but first reading usersegment, got nextchar %c\n",this->nextchar));
+ }
+
+ this->inputid++;
+
+ *pairalign_segment = this->pairalign_segment;
+ return queryseq;
+}
+#endif
+
+
+#ifdef USE_MPI
+/* Used by rank 0 to communicate with Master_parser thread of rank 0 */
/* Returns number of requests read */
static unsigned int
-fill_buffer (T this) {
+fill_buffer_master (T this) {
unsigned int nread = 0;
- unsigned int nchars = 0U;
Shortread_T queryseq1, queryseq2;
+ Filestring_T filestring1, filestring2;
bool skipp;
+#if defined(USE_MPI_FILE_INPUT)
+ MPI_Status status;
+#endif
- if (this->fastq_format_p == true) {
- if (this->gzipped != NULL) {
-#ifdef HAVE_ZLIB
- /* FASTQ input, gzipped */
- while (nread < this->nspaces &&
+ int strlength1, strlength2;
+ int offset_start_1, offset_end_1, offset_start_2, offset_end_2;
+ int nextchar_end;
+ bool donep;
#if 0
- nchars < this->maxchars &&
+ int nchars1, nchars2; /* Doesn't need to be saved as a field in Inbuffer_T. */
#endif
- (queryseq1 = Shortread_read_fastq_shortreads_gzip(&this->nextchar,&queryseq2,&this->gzipped,&this->gzipped2,
- &this->files,&this->nfiles,skipp = (this->inputid % this->part_interval != this->part_modulus),
- this->barcode_length,this->invert_first_p,this->invert_second_p)) != NULL) {
- if (skipp) {
-#if 0
- /* Shortread procedures won't allocate in this situation */
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-#endif
-
- } else if (this->filter_if_both_p == true &&
- Shortread_filterp(queryseq1) == true && (queryseq2 == NULL || Shortread_filterp(queryseq2) == true)) {
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-
- } else if (this->filter_if_both_p == false &&
- (Shortread_filterp(queryseq1) == true || (queryseq2 != NULL && Shortread_filterp(queryseq2) == true))) {
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-
- } else {
- this->buffer[nread++] = Request_new(this->requestid++,queryseq1,queryseq2);
- nchars += Shortread_fulllength(queryseq1);
- if (queryseq2 != NULL) {
- nchars += Shortread_fulllength(queryseq2);
- }
- }
- this->inputid++;
- }
+
+ /* Need to receive nextchar_end because of the difference between
+ filecontents end ('\0') and FILE * end (EOF) */
+
+ debug(fprintf(stdout,"Worker %d: accessing parser thread directly. ",this->myid));
+ Master_self_interface(this->master,&this->nextchar,&nextchar_end,
+ &offset_start_1,&offset_start_2,&offset_end_1,&offset_end_2,
+ &filestring1,&filestring2,&donep);
+
+#if defined(HAVE_ZLIB) && defined(HAVE_BZLIB)
+ if (this->gzipped == NULL && this->bzipped == NULL) {
+ debug(fprintf(stdout,"Received offsets %d..%d and %d..%d, nextchars %c..%c, donep %d\n",
+ offset_start_1,offset_end_1,offset_start_2,offset_end_2,this->nextchar,nextchar_end,donep));
+
+ FREE_IN(this->filecontents1_alloc);
+ FREE_IN(this->filecontents2_alloc);
+ this->filecontents1 = (char *) NULL;
+ this->filecontents2 = (char *) NULL;
+
+ } else {
+ this->filecontents1 = this->filecontents1_alloc = Filestring_extract(&strlength1,filestring1);
+ this->filecontents2 = this->filecontents2_alloc = Filestring_extract(&strlength2,filestring2);
+ debug(fprintf(stdout,"Received filestrings of length %d and %d\n",strlength1,strlength2));
+ }
+
+#elif defined(HAVE_ZLIB)
+ if (this->gzipped == NULL) {
+ debug(fprintf(stdout,"Received offsets %d..%d and %d..%d, nextchars %c..%c, donep %d\n",
+ offset_start_1,offset_end_1,offset_start_2,offset_end_2,this->nextchar,nextchar_end,donep));
+
+ FREE_IN(this->filecontents1_alloc);
+ FREE_IN(this->filecontents2_alloc);
+ this->filecontents1 = (char *) NULL;
+ this->filecontents2 = (char *) NULL;
+
+ } else {
+ this->filecontents1 = this->filecontents1_alloc = Filestring_extract(&strlength1,filestring1);
+ this->filecontents2 = this->filecontents2_alloc = Filestring_extract(&strlength2,filestring2);
+ debug(fprintf(stdout,"Received filestrings of length %d and %d\n",strlength1,strlength2));
+ }
+
+#elif defined(HAVE_BZLIB)
+ if (this->bzipped == NULL) {
+ debug(fprintf(stdout,"Received offsets %d..%d and %d..%d, nextchars %c..%c, donep %d\n",
+ offset_start_1,offset_end_1,offset_start_2,offset_end_2,this->nextchar,nextchar_end,donep));
+
+ FREE_IN(this->filecontents1_alloc);
+ FREE_IN(this->filecontents2_alloc);
+ this->filecontents1 = (char *) NULL;
+ this->filecontents2 = (char *) NULL;
+
+ } else {
+ this->filecontents1 = this->filecontents1_alloc = Filestring_extract(&strlength1,filestring1);
+ this->filecontents2 = this->filecontents2_alloc = Filestring_extract(&strlength2,filestring2);
+ debug(fprintf(stdout,"Received filestrings of length %d and %d\n",strlength1,strlength2));
+ }
+
+#else
+ debug(fprintf(stdout,"Received offsets %d..%d and %d..%d, nextchars %c..%c, donep %d\n",
+ offset_start_1,offset_end_1,offset_start_2,offset_end_2,this->nextchar,nextchar_end,donep));
+
+ FREE_IN(this->filecontents1_alloc);
+ FREE_IN(this->filecontents2_alloc);
+ this->filecontents1 = (char *) NULL;
+ this->filecontents2 = (char *) NULL;
#endif
- } else if (this->bzipped != NULL) {
-#ifdef HAVE_BZLIB
- /* FASTQ input, bzip2-compressed */
- while (nread < this->nspaces &&
-#if 0
- nchars < this->maxchars &&
+ Filestring_free(&filestring2);
+ Filestring_free(&filestring1);
+
+
+ if (this->filecontents1 == NULL) {
+#if defined(USE_MPI_FILE_INPUT)
+ MPI_File_seek(this->input,offset_start_1,MPI_SEEK_SET);
+ this->filecontents1 = this->filecontents1_alloc = (char *) MALLOC_IN((offset_end_1 - offset_start_1 + 1) * sizeof(char));
+ MPI_File_read(this->input,this->filecontents1,offset_end_1 - offset_start_1,MPI_CHAR,&status);
+ this->filecontents1[offset_end_1 - offset_start_1] = '\0';
+
+ if (this->input2 != NULL) {
+ MPI_File_seek(this->input2,offset_start_2,MPI_SEEK_SET);
+ this->filecontents2 = this->filecontents2_alloc = (char *) MALLOC_IN((offset_end_2 - offset_start_2 + 1) * sizeof(char));
+ MPI_File_read(this->input2,this->filecontents2,offset_end_2 - offset_start_2,MPI_CHAR,&status);
+ this->filecontents2[offset_end_2 - offset_start_2] = '\0';
+ }
+
+#else
+#ifdef HAVE_FSEEKO
+ fseeko(this->input,offset_start_1,SEEK_SET);
+#else
+ fseek(this->input,offset_start_1,SEEK_SET);
+#endif
+ this->filecontents1 = this->filecontents1_alloc = (char *) MALLOC_IN((offset_end_1 - offset_start_1 + 1) * sizeof(char));
+ fread(this->filecontents1,offset_end_1 - offset_start_1,sizeof(char),this->input);
+ this->filecontents1[offset_end_1 - offset_start_1] = '\0';
+ if (this->input2 != NULL) {
+#ifdef HAVE_FSEEKO
+ fseeko(this->input2,offset_start_2,SEEK_SET);
+#else
+ fseek(this->input2,offset_start_2,SEEK_SET);
#endif
- (queryseq1 = Shortread_read_fastq_shortreads_bzip2(&this->nextchar,&queryseq2,&this->bzipped,&this->bzipped2,
- &this->files,&this->nfiles,skipp = (this->inputid % this->part_interval != this->part_modulus),
- this->barcode_length,this->invert_first_p,this->invert_second_p)) != NULL) {
- if (skipp) {
-#if 0
- /* Shortread procedures won't allocate in this situation */
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-#endif
-
- } else if (this->filter_if_both_p == true &&
- Shortread_filterp(queryseq1) == true && (queryseq2 == NULL || Shortread_filterp(queryseq2) == true)) {
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-
- } else if (this->filter_if_both_p == false &&
- (Shortread_filterp(queryseq1) == true || (queryseq2 != NULL && Shortread_filterp(queryseq2) == true))) {
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-
- } else {
- this->buffer[nread++] = Request_new(this->requestid++,queryseq1,queryseq2);
- nchars += Shortread_fulllength(queryseq1);
- if (queryseq2 != NULL) {
- nchars += Shortread_fulllength(queryseq2);
- }
- }
- this->inputid++;
- }
+ this->filecontents2 = this->filecontents2_alloc = (char *) MALLOC_IN((offset_end_2 - offset_start_2 + 2) * sizeof(char));
+ fread(this->filecontents2,offset_end_2 - offset_start_2,sizeof(char),this->input);
+ this->filecontents2[offset_end_2 - offset_start_2] = '\0';
+ }
#endif
+ }
- } else {
- /* FASTQ input, text */
- while (nread < this->nspaces &&
+ /* Read from filecontents */
+ while (nread < this->nspaces &&
+ (queryseq1 = Shortread_read_filecontents(&this->nextchar,&queryseq2,
+ &this->filecontents1,&this->filecontents2,&this->input,&this->input2,
+#ifdef USE_MPI_FILE_INPUT
+ workers_comm,
+#endif
+ &this->files,&this->nfiles,
+ skipp = (this->inputid % part_interval != part_modulus))) != NULL) {
+ if (skipp) {
#if 0
- nchars < this->maxchars &&
+ /* Shortread procedures won't allocate in this situation */
+ Shortread_free(&queryseq1);
+ if (queryseq2 != NULL) {
+ Shortread_free(&queryseq2);
+ }
#endif
- (queryseq1 = Shortread_read_fastq_shortreads(&this->nextchar,&queryseq2,&this->input,&this->input2,
- &this->files,&this->nfiles,skipp = (this->inputid % this->part_interval != this->part_modulus),
- this->barcode_length,this->invert_first_p,this->invert_second_p)) != NULL) {
- if (skipp) {
-#if 0
- /* Shortread procedures won't allocate in this situation */
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-#endif
-
- } else if (this->filter_if_both_p == true &&
- Shortread_filterp(queryseq1) == true && (queryseq2 == NULL || Shortread_filterp(queryseq2) == true)) {
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-
- } else if (this->filter_if_both_p == false &&
- (Shortread_filterp(queryseq1) == true || (queryseq2 != NULL && Shortread_filterp(queryseq2) == true))) {
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-
- } else {
- this->buffer[nread++] = Request_new(this->requestid++,queryseq1,queryseq2);
- nchars += Shortread_fulllength(queryseq1);
- if (queryseq2 != NULL) {
- nchars += Shortread_fulllength(queryseq2);
- }
- }
- this->inputid++;
+
+ } else if (filter_if_both_p == true &&
+ Shortread_filterp(queryseq1) == true && (queryseq2 == NULL || Shortread_filterp(queryseq2) == true)) {
+ Shortread_free(&queryseq1);
+ if (queryseq2 != NULL) {
+ Shortread_free(&queryseq2);
+ }
+
+ } else if (filter_if_both_p == false &&
+ (Shortread_filterp(queryseq1) == true || (queryseq2 != NULL && Shortread_filterp(queryseq2) == true))) {
+ Shortread_free(&queryseq1);
+ if (queryseq2 != NULL) {
+ Shortread_free(&queryseq2);
}
+
+ } else {
+ this->buffer[nread++] = Request_new(this->requestid++,queryseq1,queryseq2);
}
+ this->inputid++;
+ }
- } else if (this->creads_format_p == true) {
-#ifdef HAVE_GOBY
- /* GOBY input */
- while (nread < this->nspaces &&
-#if 0
- nchars < this->maxchars &&
+ this->nleft = nread;
+ this->ptr = 0;
+
+ /* Need to set this to the FILE * end (EOF at end of file), and not the filecontents end (always '\0') */
+ this->nextchar = nextchar_end;
+
+#ifdef USE_MPI
+ debug(printf("Worker %d: ",this->myid));
#endif
- (queryseq1 = Goby_read(&queryseq2,this->gobyreader,this->barcode_length,
- this->invert_first_p,this->invert_second_p,
- skipp = (this->inputid % this->part_interval != this->part_modulus))) != NULL) {
- if (skipp) {
+ debug(printf("this->nextchar (nextchar_end) is %c (%d)\n",this->nextchar,this->nextchar));
+
+ return nread;
+}
+
+
+
+/* Used by ranks 1..n to communicate with Master_mpi_interface thread of rank 0 */
+/* Returns number of requests read */
+static unsigned int
+fill_buffer_slave (T this) {
+ unsigned int nread = 0;
+ Shortread_T queryseq1, queryseq2;
+ bool skipp, donep;
+
+ int strlength1, strlength2;
+ MPI_Status status;
+ int offset_start_1, offset_end_1, offset_start_2, offset_end_2;
+ int nextchar_end;
#if 0
- /* Shortread procedures won't allocate in this situation */
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-#endif
-
- } else if (this->filter_if_both_p == true &&
- Shortread_filterp(queryseq1) == true && (queryseq2 == NULL || Shortread_filterp(queryseq2) == true)) {
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-
- } else if (this->filter_if_both_p == false &&
- (Shortread_filterp(queryseq1) == true || (queryseq2 != NULL && Shortread_filterp(queryseq2) == true))) {
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-
- } else {
- this->buffer[nread++] = Request_new(this->requestid++,queryseq1,queryseq2);
- nchars += Shortread_fulllength(queryseq1);
- if (queryseq2 != NULL) {
- nchars += Shortread_fulllength(queryseq2);
- }
- }
- this->inputid++;
- }
+ int nchars1, nchars2; /* Doesn't need to be saved as a field in Inbuffer_T. */
#endif
+ /* Need to receive nextchar_end because of the difference between
+ filecontents end ('\0') and FILE * end (EOF) */
+
+ debug(fprintf(stdout,"Worker %d: sending notification to master process. ",this->myid));
+ MPI_SEND(&this->nfiles,1,MPI_INT,/*dest*/0,/*tag*/MPI_TAG_WANT_INPUT,MPI_COMM_WORLD);
+ MPI_RECV(&this->nextchar,1,MPI_INT,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+ MPI_RECV(&nextchar_end,1,MPI_INT,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+ MPI_RECV(&donep,1,MPI_UNSIGNED_CHAR,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+
+#if defined(HAVE_ZLIB) && defined(HAVE_BZLIB)
+ if (this->gzipped == NULL && this->bzipped == NULL) {
+ MPI_RECV(&offset_start_1,1,MPI_INT,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+ MPI_RECV(&offset_start_2,1,MPI_INT,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+ MPI_RECV(&offset_end_1,1,MPI_INT,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+ MPI_RECV(&offset_end_2,1,MPI_INT,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+ debug(fprintf(stdout,"Received offsets %d..%d and %d..%d, nextchars %c..%c, donep %d\n",
+ offset_start_1,offset_end_1,offset_start_2,offset_end_2,this->nextchar,nextchar_end,donep));
+
+ FREE_IN(this->filecontents1_alloc);
+ FREE_IN(this->filecontents2_alloc);
+ this->filecontents1 = (char *) NULL;
+ this->filecontents2 = (char *) NULL;
+
} else {
- if (this->gzipped != NULL) {
-#ifdef HAVE_ZLIB
- /* FASTA input, gzipped */
- while (nread < this->nspaces &&
-#if 0
- nchars < this->maxchars &&
+ this->filecontents1 = this->filecontents1_alloc =
+ Filestring_recv(&strlength1,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ this->filecontents2 = this->filecontents2_alloc =
+ Filestring_recv(&strlength2,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ debug(fprintf(stdout,"Received filestrings of length %d and %d\n",strlength1,strlength2));
+ }
+
+#elif defined(HAVE_ZLIB)
+ if (this->gzipped == NULL) {
+ MPI_RECV(&offset_start_1,1,MPI_INT,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+ MPI_RECV(&offset_start_2,1,MPI_INT,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+ MPI_RECV(&offset_end_1,1,MPI_INT,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+ MPI_RECV(&offset_end_2,1,MPI_INT,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+ debug(fprintf(stdout,"Received offsets %d..%d and %d..%d, nextchars %c..%c, donep %d\n",
+ offset_start_1,offset_end_1,offset_start_2,offset_end_2,this->nextchar,nextchar_end,donep));
+ debug(fprintf(stdout,"Received offsets %d..%d and %d..%d\n",offset_start_1,offset_end_1,offset_start_2,offset_end_2));
+
+ FREE_IN(this->filecontents1_alloc);
+ FREE_IN(this->filecontents2_alloc);
+ this->filecontents1 = (char *) NULL;
+ this->filecontents2 = (char *) NULL;
+
+ } else {
+ this->filecontents1 = this->filecontents1_alloc =
+ Filestring_recv(&strlength1,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ this->filecontents2 = this->filecontents2_alloc =
+ Filestring_recv(&strlength2,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ debug(fprintf(stdout,"Received filestrings of length %d and %d\n",strlength1,strlength2));
+ }
+
+#elif defined(HAVE_BZLIB)
+ if (this->bzipped == NULL) {
+ MPI_RECV(&offset_start_1,1,MPI_INT,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+ MPI_RECV(&offset_start_2,1,MPI_INT,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+ MPI_RECV(&offset_end_1,1,MPI_INT,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+ MPI_RECV(&offset_end_2,1,MPI_INT,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+ debug(fprintf(stdout,"Received offsets %d..%d and %d..%d, nextchars %c..%c, donep %d\n",
+ offset_start_1,offset_end_1,offset_start_2,offset_end_2,this->nextchar,nextchar_end,donep));
+ debug(fprintf(stdout,"Received offsets %d..%d and %d..%d\n",offset_start_1,offset_end_1,offset_start_2,offset_end_2));
+
+ FREE_IN(this->filecontents1_alloc);
+ FREE_IN(this->filecontents2_alloc);
+ this->filecontents1 = (char *) NULL;
+ this->filecontents2 = (char *) NULL;
+
+ } else {
+ this->filecontents1 = this->filecontents1_alloc =
+ Filestring_recv(&strlength1,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ this->filecontents2 = this->filecontents2_alloc =
+ Filestring_recv(&strlength2,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ debug(fprintf(stdout,"Received filestrings of length %d and %d\n",strlength1,strlength2));
+ }
+
+#else
+ MPI_RECV(&offset_start_1,1,MPI_INT,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+ MPI_RECV(&offset_start_2,1,MPI_INT,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+ MPI_RECV(&offset_end_1,1,MPI_INT,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+ MPI_RECV(&offset_end_2,1,MPI_INT,/*source*/0,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD,&status);
+ debug(fprintf(stdout,"Received offsets %d..%d and %d..%d, nextchars %c..%c, donep %d\n",
+ offset_start_1,offset_end_1,offset_start_2,offset_end_2,this->nextchar,nextchar_end,donep));
+
+ FREE_IN(this->filecontents1_alloc);
+ FREE_IN(this->filecontents2_alloc);
+ this->filecontents1 = (char *) NULL;
+ this->filecontents2 = (char *) NULL;
+#endif
+
+
+ if (this->filecontents1 == NULL) {
+#if defined(USE_MPI_FILE_INPUT)
+ MPI_File_seek(this->input,offset_start_1,MPI_SEEK_SET);
+ this->filecontents1 = this->filecontents1_alloc = (char *) MALLOC_IN((offset_end_1 - offset_start_1 + 1) * sizeof(char));
+ MPI_File_read(this->input,this->filecontents1,offset_end_1 - offset_start_1,MPI_CHAR,&status);
+ this->filecontents1[offset_end_1 - offset_start_1] = '\0';
+
+ if (this->input2 != NULL) {
+ MPI_File_seek(this->input2,offset_start_2,MPI_SEEK_SET);
+ this->filecontents2 = this->filecontents2_alloc = (char *) MALLOC_IN((offset_end_2 - offset_start_2 + 1) * sizeof(char));
+ MPI_File_read(this->input2,this->filecontents2,offset_end_2 - offset_start_2,MPI_CHAR,&status);
+ this->filecontents2[offset_end_2 - offset_start_2] = '\0';
+ }
+
+#else
+#ifdef HAVE_FSEEKO
+ fseeko(this->input,offset_start_1,SEEK_SET);
+#else
+ fseek(this->input,offset_start_1,SEEK_SET);
+#endif
+ this->filecontents1 = this->filecontents1_alloc = (char *) MALLOC_IN((offset_end_1 - offset_start_1 + 1) * sizeof(char));
+ fread(this->filecontents1,offset_end_1 - offset_start_1,sizeof(char),this->input);
+ this->filecontents1[offset_end_1 - offset_start_1] = '\0';
+ if (this->input2 != NULL) {
+#ifdef HAVE_FSEEKO
+ fseeko(this->input2,offset_start_2,SEEK_SET);
+#else
+ fseek(this->input2,offset_start_2,SEEK_SET);
#endif
- (queryseq1 = Shortread_read_fasta_shortreads_gzip(&this->nextchar,&queryseq2,&this->gzipped,&this->gzipped2,
- &this->files,&this->nfiles,skipp = (this->inputid % this->part_interval != this->part_modulus),
- this->barcode_length,this->invert_first_p,this->invert_second_p)) != NULL) {
- if (skipp) {
-#if 0
- /* Shortread procedures won't allocate in this situation */
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-#endif
-
- } else if (this->filter_if_both_p == true &&
- Shortread_filterp(queryseq1) == true && (queryseq2 == NULL || Shortread_filterp(queryseq2) == true)) {
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-
- } else if (this->filter_if_both_p == false &&
- (Shortread_filterp(queryseq1) == true || (queryseq2 != NULL && Shortread_filterp(queryseq2) == true))) {
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-
- } else {
- this->buffer[nread++] = Request_new(this->requestid++,queryseq1,queryseq2);
- nchars += Shortread_fulllength(queryseq1);
- if (queryseq2 != NULL) {
- nchars += Shortread_fulllength(queryseq2);
- }
- }
- this->inputid++;
- }
+ this->filecontents2 = this->filecontents2_alloc = (char *) MALLOC_IN((offset_end_2 - offset_start_2 + 2) * sizeof(char));
+ fread(this->filecontents2,offset_end_2 - offset_start_2,sizeof(char),this->input);
+ this->filecontents2[offset_end_2 - offset_start_2] = '\0';
+ }
#endif
+ }
- } else if (this->bzipped != NULL) {
-#ifdef HAVE_BZLIB
- /* FASTA input, bzip2-compressed */
- while (nread < this->nspaces &&
-#if 0
- nchars < this->maxchars &&
-#endif
- (queryseq1 = Shortread_read_fasta_shortreads_bzip2(&this->nextchar,&queryseq2,&this->bzipped,&this->bzipped2,
- &this->files,&this->nfiles,skipp = (this->inputid % this->part_interval != this->part_modulus),
- this->barcode_length,this->invert_first_p,this->invert_second_p)) != NULL) {
- if (skipp) {
+ /* Read from filecontents */
+ while (nread < this->nspaces &&
+ (queryseq1 = Shortread_read_filecontents(&this->nextchar,&queryseq2,
+ &this->filecontents1,&this->filecontents2,&this->input,&this->input2,
+#ifdef USE_MPI_FILE_INPUT
+ workers_comm,
+#endif
+ &this->files,&this->nfiles,
+ skipp = (this->inputid % part_interval != part_modulus))) != NULL) {
+ if (skipp) {
#if 0
- /* Shortread procedures won't allocate in this situation */
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-#endif
-
- } else if (this->filter_if_both_p == true &&
- Shortread_filterp(queryseq1) == true && (queryseq2 == NULL || Shortread_filterp(queryseq2) == true)) {
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-
- } else if (this->filter_if_both_p == false &&
- (Shortread_filterp(queryseq1) == true || (queryseq2 != NULL && Shortread_filterp(queryseq2) == true))) {
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-
- } else {
- this->buffer[nread++] = Request_new(this->requestid++,queryseq1,queryseq2);
- nchars += Shortread_fulllength(queryseq1);
- if (queryseq2 != NULL) {
- nchars += Shortread_fulllength(queryseq2);
- }
- }
- this->inputid++;
+ /* Shortread procedures won't allocate in this situation */
+ Shortread_free(&queryseq1);
+ if (queryseq2 != NULL) {
+ Shortread_free(&queryseq2);
}
#endif
+ } else if (filter_if_both_p == true &&
+ Shortread_filterp(queryseq1) == true && (queryseq2 == NULL || Shortread_filterp(queryseq2) == true)) {
+ Shortread_free(&queryseq1);
+ if (queryseq2 != NULL) {
+ Shortread_free(&queryseq2);
+ }
+
+ } else if (filter_if_both_p == false &&
+ (Shortread_filterp(queryseq1) == true || (queryseq2 != NULL && Shortread_filterp(queryseq2) == true))) {
+ Shortread_free(&queryseq1);
+ if (queryseq2 != NULL) {
+ Shortread_free(&queryseq2);
+ }
+
} else {
- /* FASTA input, text */
- while (nread < this->nspaces &&
-#if 0
- nchars < this->maxchars &&
+ this->buffer[nread++] = Request_new(this->requestid++,queryseq1,queryseq2);
+ }
+ this->inputid++;
+ }
+
+ this->nleft = nread;
+ this->ptr = 0;
+
+ /* Need to set this to the FILE * end (EOF at end of file), and not the filecontents end (always '\0') */
+ this->nextchar = nextchar_end;
+
+#ifdef USE_MPI
+ debug(printf("Worker %d: ",this->myid));
#endif
- (queryseq1 = Shortread_read_fasta_shortreads(&this->nextchar,&queryseq2,&this->input,&this->input2,
- &this->files,&this->nfiles,skipp = (this->inputid % this->part_interval != this->part_modulus),
- this->barcode_length,this->invert_first_p,this->invert_second_p)) != NULL) {
- if (skipp) {
+ debug(printf("this->nextchar (nextchar_end) is %c (%d)\n",this->nextchar,this->nextchar));
+
+ return nread;
+}
+
+#elif defined(GSNAP)
+
+/* Returns number of requests read */
+static unsigned int
+fill_buffer (T this) {
+ unsigned int nread = 0;
+ Shortread_T queryseq1, queryseq2;
+ bool skipp;
+ int nchars1, nchars2; /* Returned only because MPI master needs it. Doesn't need to be saved as a field in Inbuffer_T. */
+
+ while (nread < this->nspaces &&
+ (queryseq1 = Shortread_read(&this->nextchar,&nchars1,&nchars2,&queryseq2,
+ &this->input,&this->input2,
+#ifdef HAVE_ZLIB
+ &this->gzipped,&this->gzipped2,
+#endif
+#ifdef HAVE_BZLIB
+ &this->bzipped,&this->bzipped2,
+#endif
+ &this->files,&this->nfiles,skipp = (this->inputid % part_interval != part_modulus))) != NULL) {
+ if (skipp) {
#if 0
- /* Shortread procedures won't allocate in this situation */
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-#endif
-
- } else if (this->filter_if_both_p == true &&
- Shortread_filterp(queryseq1) == true && (queryseq2 == NULL || Shortread_filterp(queryseq2) == true)) {
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-
- } else if (this->filter_if_both_p == false &&
- (Shortread_filterp(queryseq1) == true || (queryseq2 != NULL && Shortread_filterp(queryseq2) == true))) {
- Shortread_free(&queryseq1);
- if (queryseq2 != NULL) {
- Shortread_free(&queryseq2);
- }
-
- } else {
- debug(printf("inbuffer creating request %d\n",this->requestid));
- this->buffer[nread++] = Request_new(this->requestid++,queryseq1,queryseq2);
- nchars += Shortread_fulllength(queryseq1);
- if (queryseq2 != NULL) {
- nchars += Shortread_fulllength(queryseq2);
- }
- }
- this->inputid++;
+ /* Shortread procedures won't allocate in this situation */
+ Shortread_free(&queryseq1);
+ if (queryseq2 != NULL) {
+ Shortread_free(&queryseq2);
+ }
+#endif
+
+ } else if (filter_if_both_p == true &&
+ Shortread_filterp(queryseq1) == true && (queryseq2 == NULL || Shortread_filterp(queryseq2) == true)) {
+ Shortread_free(&queryseq1);
+ if (queryseq2 != NULL) {
+ Shortread_free(&queryseq2);
+ }
+
+ } else if (filter_if_both_p == false &&
+ (Shortread_filterp(queryseq1) == true || (queryseq2 != NULL && Shortread_filterp(queryseq2) == true))) {
+ Shortread_free(&queryseq1);
+ if (queryseq2 != NULL) {
+ Shortread_free(&queryseq2);
}
+
+ } else {
+ this->buffer[nread++] = Request_new(this->requestid++,queryseq1,queryseq2);
}
+ this->inputid++;
}
this->nleft = nread;
@@ -566,25 +759,27 @@ fill_buffer (T this) {
#else
+/* GMAP version */
/* Returns number of requests read */
static unsigned int
fill_buffer (T this) {
unsigned int nread = 0;
+#if 0
unsigned int nchars = 0U;
+#endif
Sequence_T queryseq;
while (nread < this->nspaces &&
-#if 0
- nchars < this->maxchars &&
-#endif
(queryseq = Sequence_read_multifile(&this->nextchar,&this->input,
- &this->files,&this->nfiles,this->maponlyp)) != NULL) {
- if (this->inputid % this->part_interval != this->part_modulus) {
+ &this->files,&this->nfiles)) != NULL) {
+ if (this->inputid % part_interval != part_modulus) {
Sequence_free(&queryseq);
} else {
debug(printf("inbuffer creating request %d\n",this->requestid));
this->buffer[nread++] = Request_new(this->requestid++,queryseq);
+#if 0
nchars += Sequence_fulllength(queryseq);
+#endif
}
this->inputid++;
}
@@ -598,6 +793,7 @@ fill_buffer (T this) {
#endif
+#ifndef USE_MPI
/* No need to lock, since only main thread calls */
/* Returns nread to give to Outbuffer_new */
unsigned int
@@ -610,20 +806,20 @@ Inbuffer_fill_init (T this) {
return nread;
}
+#endif
-
Request_T
#ifdef GSNAP
Inbuffer_get_request (T this)
#else
-Inbuffer_get_request (Sequence_T *usersegment, T this, bool user_pairalign_p)
+Inbuffer_get_request (Sequence_T *pairalign_segment, T this)
#endif
{
Request_T request;
unsigned int nread;
-#ifdef HAVE_PTHREAD
+#if defined(HAVE_PTHREAD)
pthread_mutex_lock(&this->lock);
#endif
@@ -631,20 +827,43 @@ Inbuffer_get_request (Sequence_T *usersegment, T this, bool user_pairalign_p)
request = this->buffer[this->ptr++];
this->nleft -= 1;
+ } else if (this->nextchar == EOF) {
+ /* ? Causes stall at end */
+ /* Already know it is pointless to fill buffer */
+ Outbuffer_add_nread(this->outbuffer,/*nread*/0);
+ request = NULL;
+
} else {
- debug(printf("inbuffer filling\n"));
+#ifdef USE_MPI
+ debug(printf("Worker %d: ",this->myid));
+#endif
+ debug(printf("inbuffer filling with nextchar %c (%d)\n",this->nextchar,this->nextchar));
+
#ifndef GSNAP
if (user_pairalign_p == true) {
- /* assert(nspaces == 1) */
- if (*usersegment != NULL) {
- Sequence_free(&(*usersegment));
+ /* assert(this->nspaces == 1) */
+ if (this->pairalign_segment != NULL) {
+ Sequence_free(&this->pairalign_segment);
}
- *usersegment = Sequence_read_unlimited(&this->nextchar,stdin);
+ this->pairalign_segment = Sequence_read_unlimited(&this->nextchar,stdin);
debug(printf(" but first reading usersegment, got nextchar %c\n",this->nextchar));
}
#endif
+
+#ifdef USE_MPI
+ if (this->myid == 0) {
+ nread = fill_buffer_master(this);
+ } else {
+ nread = fill_buffer_slave(this);
+ }
+#else
nread = fill_buffer(this);
+#endif
+
Outbuffer_add_nread(this->outbuffer,nread);
+#ifdef USE_MPI
+ debug(printf("Worker %d: ",this->myid));
+#endif
debug(printf("inbuffer read %d sequences\n",nread));
if (nread == 0) {
@@ -656,7 +875,11 @@ Inbuffer_get_request (Sequence_T *usersegment, T this, bool user_pairalign_p)
}
}
-#ifdef HAVE_PTHREAD
+#ifndef GSNAP
+ *pairalign_segment = this->pairalign_segment;
+#endif
+
+#if defined(HAVE_PTHREAD)
pthread_mutex_unlock(&this->lock);
#endif
@@ -664,14 +887,14 @@ Inbuffer_get_request (Sequence_T *usersegment, T this, bool user_pairalign_p)
}
-
-/* Same as Inbuffer_get_request, but leaves sequence in buffer */
+#ifndef GSNAP
+/* Same as Inbuffer_get_request, but leaves sequence in buffer. Used by GMAP for selfalign feature. */
Request_T
Inbuffer_first_request (T this) {
Request_T request;
unsigned int nread;
-#ifdef HAVE_PTHREAD
+#if defined(HAVE_PTHREAD)
pthread_mutex_lock(&this->lock);
#endif
@@ -694,10 +917,12 @@ Inbuffer_first_request (T this) {
}
}
-#ifdef HAVE_PTHREAD
+#if defined(HAVE_PTHREAD)
pthread_mutex_unlock(&this->lock);
#endif
return request;
}
+#endif
+
diff --git a/src/inbuffer.h b/src/inbuffer.h
index 1908dbf..ab1808a 100644
--- a/src/inbuffer.h
+++ b/src/inbuffer.h
@@ -1,12 +1,27 @@
-/* $Id: inbuffer.h 83593 2013-01-16 22:59:40Z twu $ */
+/* $Id: inbuffer.h 159549 2015-02-25 22:23:14Z twu $ */
#ifndef INBUFFER_INCLUDED
#define INBUFFER_INCLUDED
+#ifdef HAVE_CONFIG_H
+#include <config.h> /* For HAVE_ZLIB, HAVE_BZLIB, USE_MPI_FILE_INPUT */
+#endif
+
+#ifdef USE_MPI
+#include <mpi.h>
+#include "mpidebug.h"
+#include "master.h"
+#endif
+
#include <stdio.h>
#include "bool.h"
#include "outbuffer.h"
-#include "sequence.h"
#include "request.h"
+#ifdef GSNAP
+#include "shortread.h"
+#else
+#include "sequence.h"
+#endif
+
#ifdef HAVE_ZLIB
#include <zlib.h>
#endif
@@ -19,35 +34,50 @@
#define T Inbuffer_T
typedef struct T *T;
+extern void
+Inbuffer_setup (bool filter_if_both_p_in,
+#if defined(USE_MPI) && defined(USE_MPI_FILE_INPUT)
+ MPI_Comm workers_comm_in,
+#endif
+#ifndef GSNAP
+ bool user_pairalign_p_in, Sequence_T global_usersegment_in,
+#endif
+ int part_modulus_in, int part_interval_in);
+
#ifndef GSNAP
extern T
Inbuffer_cmdline (char *contents, int length);
#endif
extern T
-Inbuffer_new (int nextchar, FILE *input,
+Inbuffer_new (int nextchar,
+#ifdef USE_MPI
+ int myid,
+#endif
+#if defined(USE_MPI) && defined(USE_MPI_FILE_INPUT)
+ MPI_File input,
+#else
+ FILE *input,
+#endif
#ifdef GSNAP
+#if defined(USE_MPI) && defined(USE_MPI_FILE_INPUT)
+ MPI_File input2,
+#else
FILE *input2,
+#endif
#ifdef HAVE_ZLIB
gzFile gzipped, gzFile gzipped2,
#endif
#ifdef HAVE_BZLIB
Bzip2_T bzipped, Bzip2_T bzipped2,
#endif
-#ifdef HAVE_GOBY
- Gobyreader_T gobyreader,
#endif
+ char **files, int nfiles, unsigned int nspaces);
+
+#ifdef USE_MPI
+extern void
+Inbuffer_set_master (T this, Master_T master);
#endif
- char **files, int nfiles,
-#ifdef GSNAP
- bool fastq_format_p, bool creads_format_p,
- int barcode_length, bool invert_first_p, bool invert_second_p,
- bool chop_primers_p,
-#else
- bool maponlyp,
-#endif
- unsigned int nspaces, unsigned int maxchars, int part_interval, int part_modulus,
- bool filter_if_both_p);
extern void
Inbuffer_set_outbuffer (T this, Outbuffer_T outbuffer);
@@ -55,19 +85,44 @@ Inbuffer_set_outbuffer (T this, Outbuffer_T outbuffer);
extern void
Inbuffer_free (T *old);
+#ifdef GSNAP
+extern Shortread_T
+Inbuffer_read (Shortread_T *queryseq2, T this, bool skipp);
+#else
+Sequence_T
+Inbuffer_read (Sequence_T *pairalign_segment, T this, bool skipp);
+#endif
+
+#ifndef USE_MPI
extern unsigned int
Inbuffer_fill_init (T this);
+#endif
+
extern Request_T
#ifdef GSNAP
Inbuffer_get_request (T this);
#else
-Inbuffer_get_request (Sequence_T *usersegment, T this, bool user_pairalign_p);
+Inbuffer_get_request (Sequence_T *pairalign_segment, T this);
#endif
-
+#ifndef GSNAP
extern Request_T
Inbuffer_first_request (T this);
+#endif
+
+#ifdef USE_MPI
+extern int
+Inbuffer_master_process (int n_worker_ranks, int nextchar, int nchars1, int nchars2,
+ FILE *input, FILE *input2,
+#ifdef HAVE_ZLIB
+ gzFile gzipped, gzFile gzipped2,
+#endif
+#ifdef HAVE_BZLIB
+ Bzip2_T bzipped, Bzip2_T bzipped2,
+#endif
+ char **files, int nfiles, int nspaces, int part_modulus, int part_interval);
+#endif
#undef T
#endif
diff --git a/src/indel.c b/src/indel.c
index f64254c..7127a47 100644
--- a/src/indel.c
+++ b/src/indel.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indel.c 153955 2014-11-24 17:54:45Z twu $";
+static char rcsid[] = "$Id: indel.c 167164 2015-06-09 20:54:17Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -31,6 +31,312 @@ Indel_setup (int min_indel_end_matches_in, int indel_penalty_middle_in) {
}
+/* Called only by sarray-read.c, where plusp is always true */
+/* indels is positive here */
+int
+Indel_resolve_middle_insertion (int *best_nmismatches_i, int *best_nmismatches_j,
+ Univcoord_T left, int indels, Compress_T query_compress,
+ int querystart, int queryend, int querylength,
+ int max_mismatches_allowed,
+ bool plusp, int genestrand, bool first_read_p) {
+ int best_indel_pos = -1, indel_pos;
+#ifdef DEBUG2
+ int i;
+ char *gbuffer;
+#endif
+ int nmismatches_left, nmismatches_right;
+ int best_sum, sum, nmismatches_lefti, nmismatches_righti, lefti, righti;
+ int nmismatches1, nmismatches2;
+
+#ifdef HAVE_ALLOCA
+ int *mismatch_positions_left = (int *) ALLOCA(querylength * sizeof(int));
+ int *mismatch_positions_right = (int *) ALLOCA(querylength * sizeof(int));
+#else
+ int mismatch_positions_left[MAX_READLENGTH], mismatch_positions_right[MAX_READLENGTH];
+
+ if (max_mismatches_allowed > MAX_READLENGTH) {
+ max_mismatches_allowed = MAX_READLENGTH;
+ }
+#endif
+
+
+ /* query has insertion. Get |indels| less from genome; trim from left. */
+ /* left = ptr->diagonal - querylength; */
+
+ assert(indels > 0);
+ debug2(gbuffer = (char *) CALLOC(querylength-indels+1,sizeof(char)));
+ debug2(Genome_fill_buffer_blocks(left+indels,querylength-indels,gbuffer));
+ debug2(printf("solve_middle_indel, plus, insertion: Getting genome at diagonal - querylength %d + indels %d = %llu\n",
+ querylength,indels,(unsigned long long) left+indels));
+ debug2(printf("g1: %s\n",gbuffer));
+ debug2(printf("g2: %s\n",&(gbuffer[indels])));
+
+ /* No need to check chromosome bounds */
+ debug2(printf("max_mismatches_allowed is %d\n",max_mismatches_allowed));
+ nmismatches_left = Genome_mismatches_left(mismatch_positions_left,max_mismatches_allowed,
+ query_compress,left,/*pos5*/querystart,/*pos3*/queryend,
+ plusp,genestrand,first_read_p);
+
+ debug2(
+ printf("%d mismatches on left at:",nmismatches_left);
+ for (i = 0; i <= nmismatches_left; i++) {
+ printf(" %d",mismatch_positions_left[i]);
+ }
+ printf("\n");
+ );
+
+
+ /* No need to check chromosome bounds */
+ debug2(printf("max_mismatches_allowed is %d\n",max_mismatches_allowed));
+ nmismatches_right = Genome_mismatches_right(mismatch_positions_right,max_mismatches_allowed,
+ query_compress,left-indels,/*pos5*/querystart,/*pos3*/queryend,
+ plusp,genestrand,first_read_p);
+
+ debug2(
+ printf("%d mismatches on right at:",nmismatches_right);
+ for (i = 0; i <= nmismatches_right; i++) {
+ printf(" %d",mismatch_positions_right[i]);
+ }
+ printf("\n");
+ );
+
+ best_sum = querylength + querylength;
+
+ /* Modeled after end D to get lowest possible coordinate */
+ righti = 0;
+ lefti = nmismatches_left - 1;
+ nmismatches_righti = /*righti*/ 0;
+ nmismatches_lefti = /*lefti+1*/ nmismatches_left;
+
+ while (righti < nmismatches_right) {
+ while (lefti >= 0 && mismatch_positions_left[lefti] > mismatch_positions_right[righti] - indels) {
+ lefti--;
+ }
+ sum = righti + lefti + 1;
+ debug2(printf(" (Case D) sum %d=%d+%d at indel_pos %d.",
+ sum,righti,lefti+1,mismatch_positions_right[righti]-indels+1));
+ if (sum <= best_sum) {
+ indel_pos = mismatch_positions_right[righti] - indels + 1;
+ if (indel_pos >= min_indel_end_matches && indel_pos + indels <= querylength - min_indel_end_matches) {
+ best_indel_pos = indel_pos;
+ nmismatches_righti = righti;
+ nmismatches_lefti = lefti + 1;
+ debug2(printf("**"));
+ best_sum = sum;
+ }
+ }
+ righti++;
+ }
+ debug2(printf("\n"));
+
+
+ /* Try from other side to see if we missed anything */
+ lefti = 0;
+ righti = nmismatches_right - 1;
+
+ while (lefti < nmismatches_left) {
+ while (righti >= 0 && mismatch_positions_right[righti] < mismatch_positions_left[lefti] + indels) {
+ righti--;
+ }
+ sum = lefti + righti + 1;
+ debug2(printf(" (Case D2) sum %d=%d+%d at indel_pos %d.",
+ sum,lefti,righti+1,mismatch_positions_left[lefti]));
+ if (sum < best_sum) {
+ indel_pos = mismatch_positions_left[lefti];
+ if (indel_pos >= min_indel_end_matches && indel_pos + indels <= querylength - min_indel_end_matches) {
+ best_indel_pos = indel_pos;
+ nmismatches_righti = righti + 1;
+ nmismatches_lefti = lefti;
+ debug2(printf("**"));
+ best_sum = sum;
+ }
+ } else if (sum == best_sum) {
+ indel_pos = mismatch_positions_left[lefti];
+ if (indel_pos < best_indel_pos) {
+ if (indel_pos >= min_indel_end_matches && indel_pos + indels <= querylength - min_indel_end_matches) {
+ best_indel_pos = indel_pos;
+ nmismatches_righti = righti + 1;
+ nmismatches_lefti = lefti;
+ debug2(printf("**"));
+ /* best_sum = sum; */
+ }
+ }
+ }
+ lefti++;
+ }
+ debug2(printf("\n"));
+
+ *best_nmismatches_i = nmismatches_lefti;
+ *best_nmismatches_j = nmismatches_righti;
+
+ if (best_sum > max_mismatches_allowed) {
+ debug2(printf("Returning -1\n"));
+ return -1;
+#if 0
+ } else if (plusp == true) {
+ return best_indel_pos;
+ } else {
+ return querylength - best_indel_pos - indels;
+#else
+ } else {
+ debug2(printf("Returning %d\n",best_indel_pos));
+ return best_indel_pos;
+#endif
+ }
+}
+
+
+/* Called only by sarray-read.c, where plusp is always true */
+/* indels is negative here */
+int
+Indel_resolve_middle_deletion (int *best_nmismatches_i, int *best_nmismatches_j,
+ Univcoord_T left, int indels, Compress_T query_compress,
+ int querystart, int queryend, int querylength,
+ int max_mismatches_allowed,
+ bool plusp, int genestrand, bool first_read_p) {
+ int best_indel_pos = -1, indel_pos;
+#ifdef DEBUG2
+ int i;
+ char *gbuffer;
+#endif
+ int nmismatches_left, nmismatches_right, nmismatches_lefti, nmismatches_righti;
+ int best_sum, sum, lefti, righti;
+
+#ifdef HAVE_ALLOCA
+ int *mismatch_positions_left = (int *) ALLOCA(querylength * sizeof(int));
+ int *mismatch_positions_right = (int *) ALLOCA(querylength * sizeof(int));
+#else
+ int mismatch_positions_left[MAX_READLENGTH], mismatch_positions_right[MAX_READLENGTH];
+
+ if (max_mismatches_allowed > MAX_READLENGTH) {
+ max_mismatches_allowed = MAX_READLENGTH;
+ }
+#endif
+
+
+ /* query has deletion. Get |indels| more from genome; add to right. */
+ /* left = ptr->diagonal - querylength; */
+
+ assert(indels < 0);
+ debug2(gbuffer = (char *) CALLOC(querylength-indels+1,sizeof(char)));
+ debug2(Genome_fill_buffer_blocks(left,querylength-indels,gbuffer));
+ debug2(printf("solve_middle_indel, plus, deletion (indels %d), max_mismatches_allowed %d: Getting genome at diagonal - querylength %d = %llu\n",
+ indels,max_mismatches_allowed,querylength,(unsigned long long) left));
+ debug2(printf("g1: %s\n",gbuffer));
+ debug2(printf("g2: %s\n",&(gbuffer[-indels])));
+ debug2(FREE(gbuffer));
+
+ /* No need to check chromosome bounds */
+ nmismatches_left = Genome_mismatches_left(mismatch_positions_left,max_mismatches_allowed,
+ query_compress,left,/*pos5*/querystart,/*pos3*/queryend,
+ plusp,genestrand,first_read_p);
+
+ debug2(
+ printf("%d mismatches on left at:",nmismatches_left);
+ for (i = 0; i <= nmismatches_left; i++) {
+ printf(" %d",mismatch_positions_left[i]);
+ }
+ printf("\n");
+ );
+
+ /* No need to check chromosome bounds */
+ nmismatches_right = Genome_mismatches_right(mismatch_positions_right,max_mismatches_allowed,
+ query_compress,left-indels,/*pos5*/querystart,/*pos3*/queryend,
+ plusp,genestrand,first_read_p);
+
+ debug2(
+ printf("%d mismatches on right at:",nmismatches_right);
+ for (i = 0; i <= nmismatches_right; i++) {
+ printf(" %d",mismatch_positions_right[i]);
+ }
+ printf("\n");
+ );
+
+ best_sum = querylength + querylength;
+
+ /* Modeled after end C to get lowest possible coordinate */
+ righti = 0;
+ lefti = nmismatches_left - 1;
+ nmismatches_righti = /*righti*/ 0;
+ nmismatches_lefti = /*lefti+1*/ nmismatches_left;
+
+ while (righti < nmismatches_right) {
+ while (lefti >= 0 && mismatch_positions_left[lefti] > mismatch_positions_right[righti]) {
+ lefti--;
+ }
+ sum = righti + lefti + 1;
+ debug2(printf(" (Case C1) sum %d=%d+%d at indel_pos %d.",
+ sum,righti,lefti+1,mismatch_positions_right[righti]+1));
+ if (sum <= best_sum) {
+ indel_pos = mismatch_positions_right[righti] + 1;
+ if (indel_pos >= min_indel_end_matches && indel_pos <= querylength - min_indel_end_matches) {
+ best_indel_pos = indel_pos;
+ nmismatches_righti = righti;
+ nmismatches_lefti = lefti + 1;
+ debug2(printf("**"));
+ best_sum = sum;
+ }
+ }
+ righti++;
+ }
+ debug2(printf("\n"));
+
+ /* Try from other side to see if we missed anything */
+ lefti = 0;
+ righti = nmismatches_right - 1;
+
+ while (lefti < nmismatches_left) {
+ while (righti >= 0 && mismatch_positions_right[righti] < mismatch_positions_left[lefti]) {
+ righti--;
+ }
+ sum = lefti + righti + 1;
+ debug2(printf(" (Case C2) sum %d=%d+%d at indel_pos %d.",
+ sum,lefti,righti+1,mismatch_positions_left[lefti]));
+ if (sum < best_sum) {
+ indel_pos = mismatch_positions_left[lefti];
+ if (indel_pos >= min_indel_end_matches && indel_pos <= querylength - min_indel_end_matches) {
+ best_indel_pos = indel_pos;
+ nmismatches_lefti = lefti;
+ nmismatches_righti = righti + 1;
+ debug2(printf("**"));
+ best_sum = sum;
+ }
+ } else if (sum == best_sum) {
+ indel_pos = mismatch_positions_left[lefti];
+ if (indel_pos < best_indel_pos) {
+ if (indel_pos >= min_indel_end_matches && indel_pos <= querylength - min_indel_end_matches) {
+ best_indel_pos = indel_pos;
+ nmismatches_lefti = lefti;
+ nmismatches_righti = righti + 1;
+ debug2(printf("**"));
+ /* best_sum = sum; */
+ }
+ }
+ }
+ lefti++;
+ }
+ debug2(printf("\n"));
+
+ *best_nmismatches_i = nmismatches_lefti;
+ *best_nmismatches_j = nmismatches_righti;
+
+ if (best_sum > max_mismatches_allowed) {
+ debug2(printf("Returning -1\n"));
+ return -1;
+#if 0
+ } else if (plusp == true) {
+ return best_indel_pos;
+ } else {
+ return querylength - best_indel_pos;
+#else
+ } else {
+ debug2(printf("Returning %d\n",best_indel_pos));
+ return best_indel_pos;
+#endif
+ }
+}
+
+
/* indels is positive here */
List_T
Indel_solve_middle_insertion (bool *foundp, int *found_score, int *nhits, List_T hits,
@@ -41,7 +347,7 @@ Indel_solve_middle_insertion (bool *foundp, int *found_score, int *nhits, List_T
bool plusp, int genestrand, bool first_read_p, bool sarrayp) {
#ifdef DEBUG2
int i;
- char gbuffer[MAX_READLENGTH+1];
+ char *gbuffer;
#endif
Stage3end_T hit;
int best_indel_pos, query_indel_pos, indel_pos;
@@ -63,6 +369,7 @@ Indel_solve_middle_insertion (bool *foundp, int *found_score, int *nhits, List_T
/* left = ptr->diagonal - querylength; */
assert(indels > 0);
+ debug2(gbuffer = (char *) CALLOC(querylength-indels+1,sizeof(char)));
debug2(Genome_fill_buffer_blocks(left+indels,querylength-indels,gbuffer));
debug2(printf("solve_middle_indel, plus, insertion: Getting genome at diagonal - querylength %d + indels %d = %llu\n",
querylength,indels,(unsigned long long) left+indels));
@@ -109,7 +416,7 @@ Indel_solve_middle_insertion (bool *foundp, int *found_score, int *nhits, List_T
lefti--;
}
sum = righti + lefti + 1;
- debug2(printf("(Case D) sum %d=%d+%d at indel_pos %d. ",
+ debug2(printf(" (Case D) sum %d=%d+%d at indel_pos %d.",
sum,righti,lefti+1,mismatch_positions_right[righti]-indels+1));
if (sum <= best_sum) {
indel_pos = mismatch_positions_right[righti] - indels + 1;
@@ -135,7 +442,7 @@ Indel_solve_middle_insertion (bool *foundp, int *found_score, int *nhits, List_T
righti--;
}
sum = lefti + righti + 1;
- debug2(printf("(Case D2) sum %d=%d+%d at indel_pos %d. ",
+ debug2(printf(" (Case D2) sum %d=%d+%d at indel_pos %d.",
sum,lefti,righti+1,mismatch_positions_left[lefti]));
if (sum < best_sum) {
indel_pos = mismatch_positions_left[lefti];
@@ -198,8 +505,8 @@ List_T
Indel_solve_middle_deletion (bool *foundp, int *found_score, int *nhits, List_T hits,
Univcoord_T left, Chrnum_T chrnum, Univcoord_T chroffset,
Univcoord_T chrhigh, Chrpos_T chrlength,
- int indels, Compress_T query_compress,
- int querylength, int max_mismatches_allowed,
+ int indels, Compress_T query_compress, int querylength,
+ int max_mismatches_allowed,
bool plusp, int genestrand, bool first_read_p, bool sarrayp) {
#ifdef DEBUG2
int i;
@@ -272,7 +579,7 @@ Indel_solve_middle_deletion (bool *foundp, int *found_score, int *nhits, List_T
lefti--;
}
sum = righti + lefti + 1;
- debug2(printf("(Case C1) sum %d=%d+%d at indel_pos %d. ",
+ debug2(printf(" (Case C1) sum %d=%d+%d at indel_pos %d.",
sum,righti,lefti+1,mismatch_positions_right[righti]+1));
if (sum <= best_sum) {
indel_pos = mismatch_positions_right[righti] + 1;
@@ -297,7 +604,7 @@ Indel_solve_middle_deletion (bool *foundp, int *found_score, int *nhits, List_T
righti--;
}
sum = lefti + righti + 1;
- debug2(printf("(Case C2) sum %d=%d+%d at indel_pos %d. ",
+ debug2(printf(" (Case C2) sum %d=%d+%d at indel_pos %d.",
sum,lefti,righti+1,mismatch_positions_left[lefti]));
if (sum < best_sum) {
indel_pos = mismatch_positions_left[lefti];
diff --git a/src/indel.h b/src/indel.h
index c4e612e..18b0213 100644
--- a/src/indel.h
+++ b/src/indel.h
@@ -1,15 +1,32 @@
-/* $Id: indel.h 133760 2014-04-20 05:16:56Z twu $ */
+/* $Id: indel.h 166641 2015-05-29 21:13:04Z twu $ */
#ifndef INDEL_INCLUDED
#define INDEL_INCLUDED
+
#include "bool.h"
#include "list.h"
#include "chrnum.h"
#include "genomicpos.h"
#include "compress.h"
+#include "genome.h"
extern void
Indel_setup (int min_indel_end_matches_in, int indel_penalty_middle_in);
+extern int
+Indel_resolve_middle_insertion (int *best_nmismatches_i, int *best_nmismatches_j,
+ Univcoord_T left, int indels, Compress_T query_compress,
+ int querystart, int queryend, int querylength,
+ int max_mismatches_allowed,
+ bool plusp, int genestrand, bool first_read_p);
+
+extern int
+Indel_resolve_middle_deletion (int *best_nmismatches_i, int *best_nmismatches_j,
+ Univcoord_T left, int indels, Compress_T query_compress,
+ int querystart, int queryend, int querylength,
+ int max_mismatches_allowed,
+ bool plusp, int genestrand, bool first_read_p);
+
+
extern List_T
Indel_solve_middle_insertion (bool *foundp, int *found_score, int *nhits, List_T hits,
Univcoord_T left, Chrnum_T chrnum, Univcoord_T chroffset,
@@ -22,8 +39,8 @@ extern List_T
Indel_solve_middle_deletion (bool *foundp, int *found_score, int *nhits, List_T hits,
Univcoord_T left, Chrnum_T chrnum, Univcoord_T chroffset,
Univcoord_T chrhigh, Chrpos_T chrlength,
- int indels, Compress_T query_compress,
- int querylength, int max_mismatches_allowed,
+ int indels, Compress_T query_compress, int querylength,
+ int max_mismatches_allowed,
bool plusp, int genestrand, bool first_read_p, bool sarrayp);
#endif
diff --git a/src/indexdb-write.c b/src/indexdb-write.c
index 718c6e3..a96ee88 100644
--- a/src/indexdb-write.c
+++ b/src/indexdb-write.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indexdb-write.c 153955 2014-11-24 17:54:45Z twu $";
+static char rcsid[] = "$Id: indexdb-write.c 165969 2015-05-20 00:18:07Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -24,6 +24,7 @@ static char rcsid[] = "$Id: indexdb-write.c 153955 2014-11-24 17:54:45Z twu $";
#include <string.h> /* For memset */
#include <ctype.h> /* For toupper */
#include <sys/mman.h> /* For munmap */
+
#ifdef HAVE_UNISTD_H
#include <unistd.h> /* For lseek and close */
#endif
@@ -116,6 +117,7 @@ power (int base, int exponent) {
static void
check_bitpack (char *offsetsmetafile, char *offsetsstrmfile,
Oligospace_T oligospace, Blocksize_T blocksize) {
+ int shmid;
UINT4 *offsetsmeta, *offsetsstrm, *info;
int offsetsmeta_fd, offsetsstrm_fd;
size_t offsetsmeta_len, offsetsstrm_len;
@@ -130,8 +132,8 @@ check_bitpack (char *offsetsmetafile, char *offsetsstrmfile,
offsetsmeta = (UINT4 *) Access_mmap(&offsetsmeta_fd,&offsetsmeta_len,offsetsmetafile,sizeof(UINT4),/*randomp*/false);
offsetsstrm = (UINT4 *) Access_mmap(&offsetsstrm_fd,&offsetsstrm_len,offsetsstrmfile,sizeof(UINT4),/*randomp*/false);
#else
- offsetsmeta = (UINT4 *) Access_allocated(&offsetsmeta_len,&seconds,offsetsmetafile,sizeof(UINT4));
- offsetsstrm = (UINT4 *) Access_allocated(&offsetsstrm_len,&seconds,offsetsstrmfile,sizeof(UINT4));
+ offsetsmeta = (UINT4 *) Access_allocate(&shmid,&offsetsmeta_len,&seconds,offsetsmetafile,sizeof(UINT4),/*sharedp*/false);
+ offsetsstrm = (UINT4 *) Access_allocate(&shmid,&offsetsstrm_len,&seconds,offsetsstrmfile,sizeof(UINT4),/*sharedp*/false);
#endif
for (oligo = 0; oligo < oligospace; oligo += blocksize) {
@@ -165,6 +167,7 @@ check_bitpack (char *offsetsmetafile, char *offsetsstrmfile,
static void
check_offsets_from_bitpack (char *offsetsmetafile, char *offsetsstrmfile, Positionsptr_T *offsets,
Oligospace_T oligospace, Blocksize_T blocksize) {
+ int shmid;
UINT4 *offsetsmeta;
UINT4 *offsetsstrm;
Positionsptr_T offsets_decoded[MAX_BITPACK_BLOCKSIZE+1];
@@ -180,8 +183,8 @@ check_offsets_from_bitpack (char *offsetsmetafile, char *offsetsstrmfile, Positi
offsetsmeta = (UINT4 *) Access_mmap(&offsetsmeta_fd,&offsetsmeta_len,offsetsmetafile,sizeof(UINT4),/*randomp*/false);
offsetsstrm = (UINT4 *) Access_mmap(&offsetsstrm_fd,&offsetsstrm_len,offsetsstrmfile,sizeof(UINT4),/*randomp*/false);
#else
- offsetsmeta = (UINT4 *) Access_allocated(&offsetsmeta_len,&seconds,offsetsmetafile,sizeof(UINT4));
- offsetsstrm = (UINT4 *) Access_allocated(&offsetsstrm_len,&seconds,offsetsstrmfile,sizeof(UINT4));
+ offsetsmeta = (UINT4 *) Access_allocate(&shmid,&offsetsmeta_len,&seconds,offsetsmetafile,sizeof(UINT4),/*sharedp*/false);
+ offsetsstrm = (UINT4 *) Access_allocate(&shmid,&offsetsstrm_len,&seconds,offsetsstrmfile,sizeof(UINT4),/*sharedp*/false);
#endif
for (oligoi = 0UL; oligoi < oligospace; oligoi += blocksize) {
@@ -220,6 +223,7 @@ check_offsets_from_bitpack (char *offsetsmetafile, char *offsetsstrmfile, Positi
static void
check_offsets_from_bitpack_huge (char *offsetspagesfile, char *offsetsmetafile, char *offsetsstrmfile,
Hugepositionsptr_T *offsets, Oligospace_T oligospace, Blocksize_T blocksize) {
+ int shmid;
UINT4 *offsetspages;
UINT4 *offsetsmeta;
UINT4 *offsetsstrm;
@@ -230,13 +234,13 @@ check_offsets_from_bitpack_huge (char *offsetspagesfile, char *offsetsmetafile,
double seconds;
- offsetspages = (UINT4 *) Access_allocated(&offsetspages_len,&seconds,offsetspagesfile,sizeof(UINT4));
+ offsetspages = (UINT4 *) Access_allocate(&shmid,&offsetspages_len,&seconds,offsetspagesfile,sizeof(UINT4),/*sharedp*/false);
#ifdef HAVE_MMAP
offsetsmeta = (UINT4 *) Access_mmap(&offsetsmeta_fd,&offsetsmeta_len,offsetsmetafile,sizeof(UINT4),/*randomp*/false);
offsetsstrm = (UINT4 *) Access_mmap(&offsetsstrm_fd,&offsetsstrm_len,offsetsstrmfile,sizeof(UINT4),/*randomp*/false);
#else
- offsetsmeta = (UINT4 *) Access_allocated(&offsetsmeta_len,&seconds,offsetsmetafile,sizeof(UINT4));
- offsetsstrm = (UINT4 *) Access_allocated(&offsetsstrm_len,&seconds,offsetsstrmfile,sizeof(UINT4));
+ offsetsmeta = (UINT4 *) Access_allocate(&shmid,&offsetsmeta_len,&seconds,offsetsmetafile,sizeof(UINT4),/*sharedp*/false);
+ offsetsstrm = (UINT4 *) Access_allocate(&shmid,&offsetsstrm_len,&seconds,offsetsstrmfile,sizeof(UINT4),/*sharedp*/false);
#endif
for (oligoi = 0UL; oligoi < oligospace; oligoi += blocksize) {
@@ -1961,9 +1965,10 @@ Indexdb_write_positions (char *positionsfile_high, char *positionsfile_low, char
#else
int index1part,
#endif
- int index1interval, bool genome_lc_p, bool writefilep,
+ int index1interval, Univcoord_T genomelength, bool genome_lc_p, bool writefilep,
char *fileroot, bool mask_lowercase_p, int compression_type,
bool coord_values_8p) {
+ int shmid;
FILE *positions_high_fp, *positions_low_fp; /* For building positions in memory */
int positions_high_fd, positions_low_fd; /* For building positions in file */
Positionsptr_T *offsets = NULL, totalcounts, count;
@@ -1986,21 +1991,37 @@ Indexdb_write_positions (char *positionsfile_high, char *positionsfile_low, char
if (compression_type == BITPACK64_COMPRESSION) {
offsets = Indexdb_offsets_from_bitpack(pointersfile,offsetsfile,alphabet_size,index1part_aa);
} else {
- offsets = (UINT4 *) Access_allocated(&offsetsstrm_len,&seconds,offsetsfile,sizeof(UINT4));
+ offsets = (UINT4 *) Access_allocate(&shmid,&offsetsstrm_len,&seconds,offsetsfile,sizeof(UINT4),/*sharedp*/false);
}
oligospace = power(alphabet_size,index1part_aa);
#else
if (compression_type == BITPACK64_COMPRESSION) {
offsets = Indexdb_offsets_from_bitpack(pointersfile,offsetsfile,index1part);
} else {
- offsets = (UINT4 *) Access_allocated(&offsetsstrm_len,&seconds,offsetsfile,sizeof(UINT4));
+ offsets = (UINT4 *) Access_allocate(&shmid,&offsetsstrm_len,&seconds,offsetsfile,sizeof(UINT4),/*sharedp*/false);
}
oligospace = power(4,index1part);
#endif
totalcounts = offsets[oligospace];
if (totalcounts == 0) {
- fprintf(stderr,"Something is wrong with the offsets file. Total counts is zero.\n");
- exit(9);
+ if (genomelength > index1part) {
+ fprintf(stderr,"Something is wrong with the offsets file. Total counts is zero.\n");
+ exit(9);
+ } else {
+ FREE(offsets);
+#if 0
+ if ((positions_high_fp = FOPEN_WRITE_BINARY(positionsfile_high)) == NULL) {
+ fprintf(stderr,"Can't open file %s\n",positionsfile_high);
+ exit(9);
+ } else if ((positions_low_fp = FOPEN_WRITE_BINARY(positionsfile_low)) == NULL) {
+ fprintf(stderr,"Can't open file %s\n",positionsfile_low);
+ exit(9);
+ }
+ fclose(positions_high_fp);
+ fclose(positions_low_fp);
+#endif
+ return;
+ }
}
if (writefilep == true) {
@@ -2213,7 +2234,7 @@ Indexdb_write_positions_huge (char *positionsfile_high, char *positionsfile_low,
#else
int index1part,
#endif
- int index1interval, bool genome_lc_p, bool writefilep,
+ int index1interval, Univcoord_T genomelength, bool genome_lc_p, bool writefilep,
char *fileroot, bool mask_lowercase_p, int compression_type,
bool coord_values_8p) {
FILE *positions_high_fp, *positions_low_fp; /* For building positions in memory */
@@ -2242,8 +2263,24 @@ Indexdb_write_positions_huge (char *positionsfile_high, char *positionsfile_low,
#endif
totalcounts = offsets[oligospace];
if (totalcounts == 0) {
- fprintf(stderr,"Something is wrong with the offsets file. Total counts is zero.\n");
- exit(9);
+ if (genomelength > index1part) {
+ fprintf(stderr,"Something is wrong with the offsets file. Total counts is zero.\n");
+ exit(9);
+ } else {
+ FREE(offsets);
+#if 0
+ if ((positions_high_fp = FOPEN_WRITE_BINARY(positionsfile_high)) == NULL) {
+ fprintf(stderr,"Can't open file %s\n",positionsfile_high);
+ exit(9);
+ } else if ((positions_low_fp = FOPEN_WRITE_BINARY(positionsfile_low)) == NULL) {
+ fprintf(stderr,"Can't open file %s\n",positionsfile_low);
+ exit(9);
+ }
+ fclose(positions_high_fp);
+ fclose(positions_low_fp);
+#endif
+ return;
+ }
}
if (writefilep == true) {
diff --git a/src/indexdb-write.h b/src/indexdb-write.h
index a031b3c..40105ee 100644
--- a/src/indexdb-write.h
+++ b/src/indexdb-write.h
@@ -1,4 +1,4 @@
-/* $Id: indexdb-write.h 157232 2015-01-22 18:55:31Z twu $ */
+/* $Id: indexdb-write.h 165969 2015-05-20 00:18:07Z twu $ */
#ifndef INDEXDB_WRITE_INCLUDED
#define INDEXDB_WRITE_INCLUDED
#ifdef HAVE_CONFIG_H
@@ -63,7 +63,7 @@ Indexdb_write_positions (char *positionsfile_high, char *positionsfile_low, char
#else
Width_T index1part,
#endif
- Width_T index1interval, bool genome_lc_p, bool writefilep,
+ Width_T index1interval, Univcoord_T genomelength, bool genome_lc_p, bool writefilep,
char *fileroot, bool mask_lowercase_p, int compression_type,
bool coord_values_8p);
@@ -77,7 +77,7 @@ Indexdb_write_positions_huge (char *positionsfile_high, char *positionsfile_low,
#else
int index1part,
#endif
- int index1interval, bool genome_lc_p, bool writefilep,
+ int index1interval, Univcoord_T genomelength, bool genome_lc_p, bool writefilep,
char *fileroot, bool mask_lowercase_p, int compression_type,
bool coord_values_8p);
#endif
diff --git a/src/indexdb.c b/src/indexdb.c
index 5be1780..fa9232b 100644
--- a/src/indexdb.c
+++ b/src/indexdb.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indexdb.c 153955 2014-11-24 17:54:45Z twu $";
+static char rcsid[] = "$Id: indexdb.c 161940 2015-03-25 20:36:59Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -28,6 +28,7 @@ static char rcsid[] = "$Id: indexdb.c 153955 2014-11-24 17:54:45Z twu $";
#include <string.h> /* For memset */
#include <ctype.h> /* For toupper */
#include <sys/mman.h> /* For munmap */
+
#ifdef HAVE_UNISTD_H
#include <unistd.h> /* For lseek and close */
#endif
@@ -135,13 +136,22 @@ Indexdb_setup (Width_T index1part_in) {
void
Indexdb_free (T *old) {
if (*old) {
- if ((*old)->positions_access == ALLOCATED) {
+ if ((*old)->positions_access == ALLOCATED_PRIVATE) {
#ifdef LARGE_GENOMES
FREE((*old)->positions_high);
FREE((*old)->positions_low);
#else
FREE((*old)->positions);
#endif
+
+ } else if ((*old)->positions_access == ALLOCATED_SHARED) {
+#ifdef LARGE_GENOMES
+ Access_deallocate((*old)->positions_high,(*old)->positions_high_shmid);
+ Access_deallocate((*old)->positions_low,(*old)->positions_low_shmid);
+#else
+ Access_deallocate((*old)->positions,(*old)->positions_shmid);
+#endif
+
#ifdef HAVE_MMAP
} else if ((*old)->positions_access == MMAPPED) {
#ifdef LARGE_GENOMES
@@ -166,8 +176,12 @@ Indexdb_free (T *old) {
#endif
}
- if ((*old)->offsetsstrm_access == ALLOCATED) {
+ if ((*old)->offsetsstrm_access == ALLOCATED_PRIVATE) {
FREE((*old)->offsetsstrm);
+
+ } else if ((*old)->offsetsstrm_access == ALLOCATED_SHARED) {
+ Access_deallocate((*old)->offsetsstrm,(*old)->offsetsstrm_shmid);
+
#ifdef HAVE_MMAP
} else if ((*old)->offsetsstrm_access == MMAPPED) {
munmap((void *) (*old)->offsetsstrm,(*old)->offsetsstrm_len);
@@ -175,9 +189,24 @@ Indexdb_free (T *old) {
#endif
}
- FREE((*old)->offsetsmeta); /* Always ALLOCATED */
+ if ((*old)->offsetsmeta_access == ALLOCATED_PRIVATE) {
+ FREE((*old)->offsetsmeta);
+ } else if ((*old)->offsetsmeta_access == ALLOCATED_SHARED) {
+ Access_deallocate((*old)->offsetsmeta,(*old)->offsetsmeta_shmid);
+ } else {
+ /* Always ALLOCATED */
+ abort();
+ }
+
#ifdef LARGE_GENOMES
- FREE((*old)->offsetspages); /* Always ALLOCATED */
+ if ((*old)->offsetspages_access == ALLOCATED_PRIVATE) {
+ FREE((*old)->offsetspages);
+ } else if ((*old)->offsetspages_access == ALLOCATED_SHARED) {
+ Access_deallocate((*old)->offsetspages,(*old)->offsetspages_shmid);
+ } else {
+ /* Always ALLOCATED */
+ abort();
+ }
#endif
FREE(*old);
@@ -231,7 +260,7 @@ Indexdb_mean_size (T this, Mode_T mode, Width_T index1part) {
#endif
#ifdef WORDS_BIGENDIAN
- if (this->offsetsstrm_access == ALLOCATED) {
+ if (this->offsetsstrm_access == ALLOCATED_PRIVATE || this->offsetsstrm_access == ALLOCATED_SHARED) {
return (double) this->offsetsstrm[this->offsetsmeta[oligospace/this->blocksize]]/(double) n;
} else {
return (double) Bigendian_convert_uint(this->offsetsstrm[this->offsetsmeta[oligospace/this->blocksize]])/(double) n;
@@ -296,7 +325,7 @@ Indexdb_get_filenames_no_compression (Width_T *index1part, Width_T *index1interv
char *base_filename, *filename;
char *pattern, interval_char, digit_string[2], *p, *q;
- char tens, ones, ones0;
+ char tens, ones;
Width_T found_index1part, found_interval;
int rootlength, patternlength;
@@ -564,13 +593,13 @@ Indexdb_get_filenames_bitpack (Width_T *index1part, Width_T *index1interval,
Alphabet_T found_alphabet;
#else
char *pattern;
- char tens0, tens;
+ char tens;
#endif
char interval_char, digit_string[2], *p, *q;
Width_T found_index1part = 0, found_interval = 0;
int rootlength, patternlength;
- char ones0, ones;
+ char ones;
char *offsetspages_suffix, *offsetsmeta_suffix, *offsetsstrm_suffix,
*positions_high_suffix, *positions_low_suffix;
struct dirent *entry;
@@ -1000,14 +1029,64 @@ Indexdb_get_filenames (int *compression_type,
}
+void
+Indexdb_shmem_remove (char *genomesubdir, char *fileroot, char *idx_filesuffix, char *snps_root,
+#ifdef PMAP
+ Alphabet_T *alphabet, int *alphabet_size, Alphabet_T required_alphabet,
+#endif
+ Width_T required_index1part, Width_T required_interval, bool expand_offsets_p) {
+ Filenames_T filenames;
+ int index1part, index1interval;
+
+ if ((filenames = Indexdb_get_filenames_no_compression(&index1part,&index1interval,
+ genomesubdir,fileroot,idx_filesuffix,snps_root,
+ required_interval,/*offsets_only_p*/false)) != NULL) {
+ /* Try non-compressed files */
+ Access_shmem_remove(filenames->offsets_filename);
+
+ } else if ((filenames = Indexdb_get_filenames_bitpack(
+#ifdef PMAP
+ &(*alphabet),required_alphabet,
+#endif
+ &index1part,&index1interval,
+ genomesubdir,fileroot,idx_filesuffix,snps_root,
+ required_index1part,required_interval,
+ /*blocksize*/64,/*offsets_only_p*/false)) != NULL) {
+ if (expand_offsets_p == true) {
+ /* ALLOCATED_PRIVATE */
+
+ } else {
+ Access_shmem_remove(filenames->pointers_filename);
+ Access_shmem_remove(filenames->offsets_filename);
+#ifdef LARGE_GENOMES
+ if (filenames->pages_filename != NULL) {
+ Access_shmem_remove(filenames->pages_filename);
+ }
+#endif
+ }
+ }
+
+#ifdef LARGE_GENOMES
+ Access_shmem_remove(filenames->positions_high_filename);
+ Access_shmem_remove(filenames->positions_low_filename);
+#else
+ Access_shmem_remove(filenames->positions_low_filename);
+#endif
+
+ Filenames_free(&filenames);
+
+ return;
+}
+
+
T
Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
char *genomesubdir, char *fileroot, char *idx_filesuffix, char *snps_root,
#ifdef PMAP
Alphabet_T *alphabet, int *alphabet_size, Alphabet_T required_alphabet,
#endif
- Width_T required_index1part, Width_T required_interval,
- bool expand_offsets_p, Access_mode_T offsetsstrm_access, Access_mode_T positions_access) {
+ Width_T required_index1part, Width_T required_interval, bool expand_offsets_p,
+ Access_mode_T offsetsstrm_access, Access_mode_T positions_access, bool sharedp) {
T new = (T) MALLOC(sizeof(*new));
Filenames_T filenames;
Oligospace_T basespace, base;
@@ -1046,7 +1125,7 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
for (base = 0; base <= basespace; base++) {
new->offsetsmeta[base] = base;
}
-
+ new->offsetsmeta_access = ALLOCATED_PRIVATE;
if (offsetsstrm_access == USE_ALLOCATE) {
if (snps_root) {
@@ -1056,8 +1135,8 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
fprintf(stderr,"Allocating memory for %s offsets, kmer %d, interval %d...",
idx_filesuffix,new->index1part,new->index1interval);
}
- new->offsetsstrm = (UINT4 *) Access_allocated(&new->offsetsstrm_len,&seconds,
- filenames->offsets_filename,sizeof(UINT4));
+ new->offsetsstrm = (UINT4 *) Access_allocate(&new->offsetsstrm_shmid,&new->offsetsstrm_len,&seconds,
+ filenames->offsets_filename,sizeof(UINT4),sharedp);
if (new->offsetsstrm == NULL) {
fprintf(stderr,"insufficient memory (need to use a lower batch mode (-B))\n");
exit(9);
@@ -1065,7 +1144,11 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
comma = Genomicpos_commafmt(new->offsetsstrm_len);
fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma,seconds);
FREE(comma);
- new->offsetsstrm_access = ALLOCATED;
+ if (sharedp == true) {
+ new->offsetsstrm_access = ALLOCATED_SHARED;
+ } else {
+ new->offsetsstrm_access = ALLOCATED_PRIVATE;
+ }
}
#ifdef HAVE_MMAP
@@ -1150,6 +1233,7 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
for (base = 0; base <= basespace; base++) {
new->offsetsmeta[base] = base;
}
+ new->offsetsmeta_access = ALLOCATED_PRIVATE;
#ifdef PMAP
new->offsetsstrm = Indexdb_offsets_from_bitpack(filenames->pointers_filename,filenames->offsets_filename,
@@ -1158,7 +1242,7 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
new->offsetsstrm = Indexdb_offsets_from_bitpack(filenames->pointers_filename,filenames->offsets_filename,
new->index1part);
#endif
- new->offsetsstrm_access = ALLOCATED;
+ new->offsetsstrm_access = ALLOCATED_PRIVATE;
#endif
@@ -1200,8 +1284,13 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
fprintf(stderr,"Allocating memory for %s offset pointers, kmer %d, interval %d...",
idx_filesuffix,new->index1part,new->index1interval);
}
- new->offsetsmeta = (UINT4 *) Access_allocated(&new->offsetsmeta_len,&seconds,
- filenames->pointers_filename,sizeof(UINT4));
+ new->offsetsmeta = (UINT4 *) Access_allocate(&new->offsetsmeta_shmid,&new->offsetsmeta_len,&seconds,
+ filenames->pointers_filename,sizeof(UINT4),sharedp);
+ if (sharedp == true) {
+ new->offsetsmeta_access = ALLOCATED_SHARED;
+ } else {
+ new->offsetsmeta_access = ALLOCATED_PRIVATE;
+ }
comma = Genomicpos_commafmt(new->offsetsmeta_len);
fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma,seconds);
@@ -1216,8 +1305,8 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
fprintf(stderr,"Allocating memory for %s offsets, kmer %d, interval %d...",
idx_filesuffix,new->index1part,new->index1interval);
}
- new->offsetsstrm = (UINT4 *) Access_allocated(&new->offsetsstrm_len,&seconds,
- filenames->offsets_filename,sizeof(UINT4));
+ new->offsetsstrm = (UINT4 *) Access_allocate(&new->offsetsstrm_shmid,&new->offsetsstrm_len,&seconds,
+ filenames->offsets_filename,sizeof(UINT4),sharedp);
if (new->offsetsstrm == NULL) {
fprintf(stderr,"insufficient memory (need to use a lower batch mode (-B))\n");
exit(9);
@@ -1225,7 +1314,11 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
comma = Genomicpos_commafmt(new->offsetsstrm_len);
fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma,seconds);
FREE(comma);
- new->offsetsstrm_access = ALLOCATED;
+ if (sharedp == true) {
+ new->offsetsstrm_access = ALLOCATED_SHARED;
+ } else {
+ new->offsetsstrm_access = ALLOCATED_PRIVATE;
+ }
}
#ifdef HAVE_MMAP
@@ -1238,7 +1331,7 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
idx_filesuffix,new->index1part,new->index1interval);
}
new->offsetsstrm = (UINT4 *) Access_mmap_and_preload(&new->offsetsstrm_fd,&new->offsetsstrm_len,&npages,&seconds,
- filenames->offsets_filename,sizeof(UINT4));
+ filenames->offsets_filename,sizeof(UINT4));
if (new->offsetsstrm == NULL) {
fprintf(stderr,"insufficient memory (will use disk file instead, but program may not run)\n");
#ifdef PMAP
@@ -1255,7 +1348,7 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
} else if (offsetsstrm_access == USE_MMAP_ONLY) {
new->offsetsstrm = (UINT4 *) Access_mmap(&new->offsetsstrm_fd,&new->offsetsstrm_len,
- filenames->offsets_filename,sizeof(UINT4),/*randomp*/false);
+ filenames->offsets_filename,sizeof(UINT4),/*randomp*/false);
if (new->offsetsstrm == NULL) {
fprintf(stderr,"Insufficient memory for mmap of %s (will use disk file instead, but program may not run)\n",
filenames->offsets_filename);
@@ -1283,8 +1376,14 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
/* Sanity check on positions filesize */
#ifdef LARGE_GENOMES
if (filenames->pages_filename != NULL) {
- new->offsetspages = (UINT4 *) Access_allocated(&offsetspages_len,&seconds,filenames->pages_filename,sizeof(UINT4));
+ new->offsetspages = (UINT4 *) Access_allocate(&new->offsetspages_shmid,&offsetspages_len,&seconds,filenames->pages_filename,sizeof(UINT4),sharedp);
+ if (sharedp == true) {
+ new->offsetspages_access = ALLOCATED_SHARED;
+ } else {
+ new->offsetspages_access = ALLOCATED_PRIVATE;
+ }
} else {
+ new->offsetspages_access = ALLOCATED_PRIVATE;
new->offsetspages = (UINT4 *) MALLOC(1*sizeof(UINT4));
new->offsetspages[0] = -1U;
}
@@ -1329,8 +1428,8 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
idx_filesuffix,new->index1part,new->index1interval);
}
#ifdef LARGE_GENOMES
- new->positions_high = (unsigned char *) Access_allocated(&new->positions_high_len,&seconds,
- filenames->positions_high_filename,sizeof(unsigned char));
+ new->positions_high = (unsigned char *) Access_allocate(&new->positions_high_shmid,&new->positions_high_len,&seconds,
+ filenames->positions_high_filename,sizeof(unsigned char),sharedp);
if (new->positions_high == NULL) {
fprintf(stderr,"insufficient memory (need to use a lower batch mode (-B)\n");
exit(9);
@@ -1339,8 +1438,8 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
fprintf(stderr,"done (%s bytes, %.2f sec), ",comma,seconds);
FREE(comma);
- new->positions_low = (UINT4 *) Access_allocated(&new->positions_low_len,&seconds,
- filenames->positions_low_filename,sizeof(UINT4));
+ new->positions_low = (UINT4 *) Access_allocate(&new->positions_low_shmid,&new->positions_low_len,&seconds,
+ filenames->positions_low_filename,sizeof(UINT4),sharedp);
if (new->positions_low == NULL) {
fprintf(stderr,"insufficient memory (need to use a lower batch mode (-B)\n");
exit(9);
@@ -1349,12 +1448,16 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma,seconds);
FREE(comma);
- new->positions_access = ALLOCATED;
+ if (sharedp == true) {
+ new->positions_access = ALLOCATED_SHARED;
+ } else {
+ new->positions_access = ALLOCATED_PRIVATE;
+ }
}
}
#else
- new->positions = (UINT4 *) Access_allocated(&new->positions_len,&seconds,
- filenames->positions_low_filename,sizeof(UINT4));
+ new->positions = (UINT4 *) Access_allocate(&new->positions_shmid,&new->positions_len,&seconds,
+ filenames->positions_low_filename,sizeof(UINT4),sharedp);
if (new->positions == NULL) {
fprintf(stderr,"insufficient memory (need to use a lower batch mode (-B)\n");
exit(9);
@@ -1362,7 +1465,11 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
comma = Genomicpos_commafmt(new->positions_len);
fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma,seconds);
FREE(comma);
- new->positions_access = ALLOCATED;
+ if (sharedp == true) {
+ new->positions_access = ALLOCATED_SHARED;
+ } else {
+ new->positions_access = ALLOCATED_PRIVATE;
+ }
}
#endif
@@ -1695,13 +1802,16 @@ positions_read_backward (int positions_fd) {
/* Used by non-utility programs */
Positionsptr_T *
-Indexdb_offsets_from_bitpack (char *offsetsmetafile, char *offsetsstrmfile,
+Indexdb_offsets_from_bitpack (char *offsetsmetafile, char *offsetsstrmfile,
#ifdef PMAP
int alphabet_size, Width_T index1part_aa
#else
Width_T index1part
#endif
) {
+#ifndef HAVE_MMAP
+ int shmid;
+#endif
UINT4 *offsetsmeta;
UINT4 *offsetsstrm;
int offsetsmeta_fd, offsetsstrm_fd;
@@ -1725,8 +1835,8 @@ Indexdb_offsets_from_bitpack (char *offsetsmetafile, char *offsetsstrmfile,
offsetsmeta = (UINT4 *) Access_mmap(&offsetsmeta_fd,&offsetsmeta_len,offsetsmetafile,sizeof(UINT4),/*randomp*/false);
offsetsstrm = (UINT4 *) Access_mmap(&offsetsstrm_fd,&offsetsstrm_len,offsetsstrmfile,sizeof(UINT4),/*randomp*/false);
#else
- offsetsmeta = (UINT4 *) Access_allocated(&offsetsmeta_len,&seconds,offsetsmetafile,sizeof(UINT4));
- offsetsstrm = (UINT4 *) Access_allocated(&offsetsstrm_len,&seconds,offsetsstrmfile,sizeof(UINT4));
+ offsetsmeta = (UINT4 *) Access_allocate(&shmid,&offsetsmeta_len,&seconds,offsetsmetafile,sizeof(UINT4),/*sharedp*/false);
+ offsetsstrm = (UINT4 *) Access_allocate(&shmid,&offsetsstrm_len,&seconds,offsetsstrmfile,sizeof(UINT4),/*sharedp*/false);
#endif
#ifdef OLIGOSPACE_NOT_LONG
@@ -1786,7 +1896,7 @@ Indexdb_offsets_from_bitpack (char *offsetsmetafile, char *offsetsstrmfile,
#if defined(HAVE_64_BIT) && defined(UTILITYP)
-/* Used by utility programs */
+/* Used by utility programs for writing indexdb */
Hugepositionsptr_T *
Indexdb_offsets_from_bitpack_huge (char *offsetspagesfile, char *offsetsmetafile, char *offsetsstrmfile,
#ifdef PMAP
@@ -1798,6 +1908,8 @@ Indexdb_offsets_from_bitpack_huge (char *offsetspagesfile, char *offsetsmetafile
UINT4 *offsetspages;
UINT4 *offsetsmeta;
UINT4 *offsetsstrm;
+
+ int shmid;
int offsetsmeta_fd, offsetsstrm_fd;
size_t offsetspages_len, offsetsmeta_len, offsetsstrm_len;
Hugepositionsptr_T *offsets = NULL;
@@ -1814,22 +1926,21 @@ Indexdb_offsets_from_bitpack_huge (char *offsetspagesfile, char *offsetsmetafile
#endif
if (blocksize == 1) {
- return (Hugepositionsptr_T *) Access_allocated(&offsetsstrm_len,&seconds,offsetsstrmfile,sizeof(Hugepositionsptr_T));
+ return (Hugepositionsptr_T *) Access_allocate(&shmid,&offsetsstrm_len,&seconds,offsetsstrmfile,sizeof(Hugepositionsptr_T),/*sharedp*/false);
} else {
-
if (offsetspagesfile == NULL) {
offsetspages = (UINT4 *) MALLOC(1*sizeof(UINT4));
offsetspages[0] = -1U;
} else {
- offsetspages = (UINT4 *) Access_allocated(&offsetspages_len,&seconds,offsetspagesfile,sizeof(UINT4));
+ offsetspages = (UINT4 *) Access_allocate(&shmid,&offsetspages_len,&seconds,offsetspagesfile,sizeof(UINT4),/*sharedp*/false);
}
#ifdef HAVE_MMAP
offsetsmeta = (UINT4 *) Access_mmap(&offsetsmeta_fd,&offsetsmeta_len,offsetsmetafile,sizeof(UINT4),/*randomp*/false);
offsetsstrm = (UINT4 *) Access_mmap(&offsetsstrm_fd,&offsetsstrm_len,offsetsstrmfile,sizeof(UINT4),/*randomp*/false);
#else
- offsetsmeta = (UINT4 *) Access_allocated(&offsetsmeta_len,&seconds,offsetsmetafile,sizeof(UINT4));
- offsetsstrm = (UINT4 *) Access_allocated(&offsetsstrm_len,&seconds,offsetsstrmfile,sizeof(UINT4));
+ offsetsmeta = (UINT4 *) Access_allocate(&shmid,&offsetsmeta_len,&seconds,offsetsmetafile,sizeof(UINT4),/*sharedp*/false);
+ offsetsstrm = (UINT4 *) Access_allocate(&shmid,&offsetsstrm_len,&seconds,offsetsstrmfile,sizeof(UINT4),/*sharedp*/false);
#endif
#ifdef OLIGOSPACE_NOT_LONG
@@ -1902,7 +2013,7 @@ Indexdb_read (int *nentries, T this, Storedoligomer_T aaindex) {
if (this->compression_type == NO_COMPRESSION) {
#ifdef WORDS_BIGENDIAN
- if (this->offsetsstrm_access == ALLOCATED) {
+ if (this->offsetsstrm_access == ALLOCATED_PRIVATE || this->offsetsstrm_access == ALLOCATED_SHARED) {
ptr0 = this->offsetsstrm[aaindex];
end0 = this->offsetsstrm[aaindex+1];
} else {
@@ -1965,7 +2076,7 @@ Indexdb_read (int *nentries, T this, Storedoligomer_T aaindex) {
pthread_mutex_unlock(&this->positions_read_mutex);
#endif
- } else if (this->positions_access == ALLOCATED) {
+ } else if (this->positions_access == ALLOCATED_PRIVATE || this->positions_access == ALLOCATED_SHARED) {
#ifdef LARGE_GENOMES
positions_copy_multiple_large(positions,&(this->positions_high[ptr0]),&(this->positions_low[ptr0]),*nentries);
#else
@@ -2057,7 +2168,7 @@ Indexdb_read (int *nentries, T this, Storedoligomer_T oligo) {
if (this->compression_type == NO_COMPRESSION) {
#ifdef WORDS_BIGENDIAN
- if (this->offsetsstrm_access == ALLOCATED) {
+ if (this->offsetsstrm_access == ALLOCATED_PRIVATE || this->offsetsstrm_access == ALLOCATED_SHARED) {
ptr0 = this->offsetsstrm[part0];
end0 = this->offsetsstrm[part0+1];
} else {
@@ -2123,7 +2234,7 @@ Indexdb_read (int *nentries, T this, Storedoligomer_T oligo) {
#ifdef HAVE_PTHREAD
pthread_mutex_unlock(&this->positions_read_mutex);
#endif
- } else if (this->positions_access == ALLOCATED) {
+ } else if (this->positions_access == ALLOCATED_PRIVATE || this->positions_access == ALLOCATED_SHARED) {
#ifdef LARGE_GENOMES
positions_copy_multiple_large(positions,&(this->positions_high[ptr0]),&(this->positions_low[ptr0]),*nentries);
#else
@@ -2210,7 +2321,7 @@ Indexdb_read_inplace (int *nentries,
if (this->compression_type == NO_COMPRESSION) {
#ifdef WORDS_BIGENDIAN
- if (this->offsetsstrm_access == ALLOCATED) {
+ if (this->offsetsstrm_access == ALLOCATED_PRIVATE || this->offsetsstrm_access == ALLOCATED_SHARED) {
ptr0 = this->offsetsstrm[part0];
end0 = this->offsetsstrm[part0+1];
} else {
@@ -2283,7 +2394,7 @@ Indexdb_read_with_diagterm (int *nentries, T this, Storedoligomer_T oligo, int d
if (this->compression_type == NO_COMPRESSION) {
#ifdef WORDS_BIGENDIAN
- if (this->offsetsstrm_access == ALLOCATED) {
+ if (this->offsetsstrm_access == ALLOCATED_PRIVATE || this->offsetsstrm_access == ALLOCATED_SHARED) {
ptr0 = this->offsetsstrm[oligo];
end0 = this->offsetsstrm[oligo+1];
} else {
@@ -2328,7 +2439,7 @@ Indexdb_read_with_diagterm (int *nentries, T this, Storedoligomer_T oligo, int d
pthread_mutex_unlock(&this->positions_read_mutex);
#endif
- } else if (this->positions_access == ALLOCATED) {
+ } else if (this->positions_access == ALLOCATED_PRIVATE || this->positions_access == ALLOCATED_SHARED) {
#ifdef LARGE_GENOMES
for (ptr = ptr0, i = 0; ptr < end0; ptr++) {
positions[i++] = ((Univcoord_T) this->positions_high[ptr] << 32) + this->positions_low[ptr] + diagterm;
@@ -2387,7 +2498,7 @@ Indexdb_read_with_diagterm_sizelimit (int *nentries, T this, Storedoligomer_T ol
if (this->compression_type == NO_COMPRESSION) {
#ifdef WORDS_BIGENDIAN
- if (this->offsetsstrm_access == ALLOCATED) {
+ if (this->offsetsstrm_access == ALLOCATED_PRIVATE || this->offsetsstrm_access == ALLOCATED_SHARED) {
ptr0 = this->offsetsstrm[oligo];
end0 = this->offsetsstrm[oligo+1];
} else {
@@ -2439,7 +2550,7 @@ Indexdb_read_with_diagterm_sizelimit (int *nentries, T this, Storedoligomer_T ol
pthread_mutex_unlock(&this->positions_read_mutex);
#endif
- } else if (this->positions_access == ALLOCATED) {
+ } else if (this->positions_access == ALLOCATED_PRIVATE || this->positions_access == ALLOCATED_SHARED) {
#ifdef LARGE_GENOMES
for (ptr = ptr0, i = 0; ptr < end0; ptr++) {
positions[i++] = ((Univcoord_T) this->positions_high[ptr] << 32) + this->positions_low[ptr] + diagterm;
@@ -2540,8 +2651,10 @@ Indexdb_new_segment (char *genomicseg,
for (oligoi = 0; oligoi <= oligospace; oligoi++) {
new->offsetsmeta[oligoi] = oligoi;
}
+ new->offsetsmeta_access = ALLOCATED_PRIVATE;
new->offsetsstrm = (UINT4 *) CALLOC(oligospace+1,sizeof(UINT4));
+ new->offsetsstrm_access = ALLOCATED_PRIVATE;
p = genomicseg;
while ((c = *(p++)) != '\0') {
@@ -2677,7 +2790,7 @@ Indexdb_new_segment (char *genomicseg,
exit(9);
}
new->positions = (Univcoord_T *) CALLOC(totalcounts,sizeof(Univcoord_T));
- new->positions_access = ALLOCATED;
+ new->positions_access = ALLOCATED_PRIVATE;
p = genomicseg;
while ((c = *(p++)) != '\0') {
diff --git a/src/indexdb.h b/src/indexdb.h
index 9ee1f6b..fc83843 100644
--- a/src/indexdb.h
+++ b/src/indexdb.h
@@ -1,4 +1,4 @@
-/* $Id: indexdb.h 157232 2015-01-22 18:55:31Z twu $ */
+/* $Id: indexdb.h 161940 2015-03-25 20:36:59Z twu $ */
#ifndef INDEXDB_INCLUDED
#define INDEXDB_INCLUDED
#ifdef HAVE_CONFIG_H
@@ -131,6 +131,13 @@ Indexdb_offsets_from_bitpack_huge (char *bitpackpagesfile, char *offsetsmetafile
);
#endif
+extern void
+Indexdb_shmem_remove (char *genomesubdir, char *fileroot, char *idx_filesuffix, char *snps_root,
+#ifdef PMAP
+ Alphabet_T *alphabet, int *alphabet_size, Alphabet_T required_alphabet,
+#endif
+ Width_T required_index1part, Width_T required_interval, bool expand_offsets_p);
+
extern T
Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
char *genomesubdir, char *fileroot, char *idx_filesuffix, char *snps_root,
@@ -138,7 +145,7 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
Alphabet_T *alphabet, int *alphabet_size, Alphabet_T required_alphabet,
#endif
Width_T required_index1part, Width_T required_interval, bool expand_offsets_p,
- Access_mode_T offsetsstrm_access, Access_mode_T positions_access);
+ Access_mode_T offsetsstrm_access, Access_mode_T positions_access, bool sharedp);
#ifndef UTILITYP
extern T
Indexdb_new_segment (char *genomicseg,
diff --git a/src/indexdb_hr.h b/src/indexdb_hr.h
index 365107a..f8224ff 100644
--- a/src/indexdb_hr.h
+++ b/src/indexdb_hr.h
@@ -1,6 +1,7 @@
-/* $Id: indexdb_hr.h 99737 2013-06-27 19:33:03Z twu $ */
+/* $Id: indexdb_hr.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef INDEXDB_HR_INCLUDED
#define INDEXDB_HR_INCLUDED
+
#include "bool.h"
#include "genomicpos.h"
#include "reader.h"
diff --git a/src/indexdbdef.h b/src/indexdbdef.h
index cef7911..cfc8d5c 100644
--- a/src/indexdbdef.h
+++ b/src/indexdbdef.h
@@ -1,4 +1,4 @@
-/* $Id: indexdbdef.h 157232 2015-01-22 18:55:31Z twu $ */
+/* $Id: indexdbdef.h 161940 2015-03-25 20:36:59Z twu $ */
#ifndef INDEXDBDEF_INCLUDED
#define INDEXDBDEF_INCLUDED
#ifdef HAVE_CONFIG_H
@@ -34,27 +34,37 @@ struct T {
Blocksize_T blocksize; /* e.g., 64 = 4^(15-12) */
#ifdef LARGE_GENOMES
+ Access_T offsetspages_access;
+ int offsetspages_shmid;
UINT4 *offsetspages;
#endif
+ Access_T offsetsmeta_access;
+ int offsetsmeta_shmid;
int offsetsmeta_fd;
size_t offsetsmeta_len;
UINT4 *offsetsmeta;
Access_T offsetsstrm_access;
+ int offsetsstrm_shmid;
int offsetsstrm_fd;
size_t offsetsstrm_len;
UINT4 *offsetsstrm;
Access_T positions_access;
#ifdef LARGE_GENOMES
+ int positions_high_shmid;
int positions_high_fd;
size_t positions_high_len;
+
+ int positions_low_shmid;
int positions_low_fd;
size_t positions_low_len;
+
unsigned char *positions_high;
UINT4 *positions_low;
#else
+ int positions_shmid;
int positions_fd;
size_t positions_len;
UINT4 *positions; /* For small genomes, same as Univcoord_T */
diff --git a/src/interval.h b/src/interval.h
index bcef35a..8e0bede 100644
--- a/src/interval.h
+++ b/src/interval.h
@@ -1,6 +1,7 @@
-/* $Id: interval.h 135351 2014-05-07 15:56:14Z twu $ */
+/* $Id: interval.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef INTERVAL_INCLUDED
#define INTERVAL_INCLUDED
+
#include "bool.h"
#include "genomicpos.h"
#include "types.h"
diff --git a/src/intlist.c b/src/intlist.c
index 0734cdc..ea443a4 100644
--- a/src/intlist.c
+++ b/src/intlist.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: intlist.c 145990 2014-08-25 21:47:32Z twu $";
+static char rcsid[] = "$Id: intlist.c 166641 2015-05-29 21:13:04Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -138,6 +138,48 @@ Intlist_max (T list) {
return m;
}
+int
+Intlist_min (T list) {
+ int m;
+
+ if (list == NULL) {
+ return 0;
+
+ } else {
+ m = list->first;
+ list = list->rest;
+ while (list) {
+ if (list->first < m) {
+ m = list->first;
+ }
+ list = list->rest;
+ }
+
+ return m;
+ }
+}
+
+bool
+Intlist_vary (T list) {
+ int m;
+
+ if (list == NULL) {
+ return false;
+
+ } else {
+ m = list->first;
+ list = list->rest;
+ while (list) {
+ if (list->first != m) {
+ return true;
+ }
+ list = list->rest;
+ }
+
+ return false;
+ }
+}
+
bool
Intlist_exists_p (T list, int x) {
while (list) {
diff --git a/src/intlist.h b/src/intlist.h
index 647de2f..16851b5 100644
--- a/src/intlist.h
+++ b/src/intlist.h
@@ -1,6 +1,7 @@
-/* $Id: intlist.h 145990 2014-08-25 21:47:32Z twu $ */
+/* $Id: intlist.h 166641 2015-05-29 21:13:04Z twu $ */
#ifndef INTLIST_INCLUDED
#define INTLIST_INCLUDED
+
#include "bool.h"
#define T Intlist_T
@@ -32,6 +33,10 @@ extern int
Intlist_length (T list);
extern int
Intlist_max (T list);
+extern int
+Intlist_min (T list);
+extern bool
+Intlist_vary (T list);
extern bool
Intlist_exists_p (T list, int x);
extern int *
diff --git a/src/intron.h b/src/intron.h
index 1ca0e2f..24b4541 100644
--- a/src/intron.h
+++ b/src/intron.h
@@ -1,6 +1,7 @@
-/* $Id: intron.h 99737 2013-06-27 19:33:03Z twu $ */
+/* $Id: intron.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef INTRON_INCLUDED
#define INTRON_INCLUDED
+
#include "bool.h"
#include "genomicpos.h"
#include "iit-read.h"
diff --git a/src/junction.c b/src/junction.c
new file mode 100644
index 0000000..f59feb4
--- /dev/null
+++ b/src/junction.c
@@ -0,0 +1,240 @@
+static char rcsid[] = "$Id: junction.c 166641 2015-05-29 21:13:04Z twu $";
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "junction.h"
+#include "mem.h"
+#include "complement.h"
+
+
+#define T Junction_T
+struct T {
+ Junctiontype_T type;
+ int nindels;
+ Univcoord_T deletionpos;
+
+ Chrpos_T splice_distance;
+ int sensedir;
+ double donor_prob;
+ double acceptor_prob;
+};
+
+
+void
+Junction_print (T this) {
+ if (this == NULL) {
+ printf("No junction\n");
+ } else if (this->type == INS_JUNCTION) {
+ printf("Insertion of %d\n",this->nindels);
+ } else if (this->type == DEL_JUNCTION) {
+ printf("Deletion of %d at %u\n",this->nindels,this->deletionpos);
+ } else if (this->type == SPLICE_JUNCTION) {
+ if (this->splice_distance == 0) {
+ printf("Splice ambiguous with sense %d, prob %f and %f\n",
+ this->sensedir,this->donor_prob,this->acceptor_prob);
+ } else {
+ printf("Splice with sense %d of %u, prob %f and %f\n",
+ this->sensedir,this->splice_distance,this->donor_prob,this->acceptor_prob);
+ }
+ }
+ return;
+}
+
+void
+Junction_free (T *old) {
+ FREE(*old);
+ return;
+}
+
+void
+Junction_gc (List_T *list) {
+ List_T p;
+ T old;
+
+ for (p = *list; p != NULL; p = List_next(p)) {
+ old = (T) List_head(p);
+ Junction_free(&old);
+ }
+ List_free(&(*list));
+ return;
+}
+
+T
+Junction_new_insertion (int nindels) {
+ T new = (T) MALLOC(sizeof(*new));
+
+ new->type = INS_JUNCTION;
+ new->nindels = nindels;
+ new->deletionpos = 0;
+
+ new->splice_distance = 0;
+ new->sensedir = 0;
+ new->donor_prob = 0.0;
+ new->acceptor_prob = 0.0;
+
+ return new;
+}
+
+T
+Junction_new_deletion (int nindels, Univcoord_T deletionpos) {
+ T new = (T) MALLOC(sizeof(*new));
+
+ new->type = DEL_JUNCTION;
+ new->nindels = nindels;
+ new->deletionpos = deletionpos;
+
+ new->splice_distance = 0;
+ new->sensedir = 0;
+ new->donor_prob = 0.0;
+ new->acceptor_prob = 0.0;
+
+ return new;
+}
+
+T
+Junction_new_splice (Chrpos_T splice_distance, int sensedir, double donor_prob, double acceptor_prob) {
+ T new = (T) MALLOC(sizeof(*new));
+
+ new->type = SPLICE_JUNCTION;
+ new->nindels = 0;
+ new->deletionpos = 0;
+
+ new->splice_distance = splice_distance;
+ new->sensedir = sensedir;
+ new->donor_prob = donor_prob;
+ new->acceptor_prob = acceptor_prob;
+
+ return new;
+}
+
+
+T
+Junction_new_chimera (int sensedir, double donor_prob, double acceptor_prob) {
+ T new = (T) MALLOC(sizeof(*new));
+
+ new->type = CHIMERA_JUNCTION;
+ new->nindels = 0;
+ new->deletionpos = 0;
+
+ new->splice_distance = 0;
+ new->sensedir = 0;
+ new->donor_prob = donor_prob;
+ new->acceptor_prob = acceptor_prob;
+
+ return new;
+}
+
+T
+Junction_copy (T old) {
+ T new = (T) MALLOC(sizeof(*new));
+
+ new->type = old->type;
+ new->nindels = old->nindels;
+ new->deletionpos = old->deletionpos;
+
+ new->splice_distance = old->splice_distance;
+ new->sensedir = old->sensedir;
+ new->donor_prob = old->donor_prob;
+ new->acceptor_prob = old->acceptor_prob;
+
+ return new;
+}
+
+
+
+
+Junctiontype_T
+Junction_type (T this) {
+ return this->type;
+}
+
+double
+Junction_prob (T this) {
+ return this->donor_prob + this->acceptor_prob;
+}
+
+int
+Junction_sensedir (T this) {
+ return this->sensedir;
+}
+
+double
+Junction_donor_prob (T this) {
+ return this->donor_prob;
+}
+
+double
+Junction_acceptor_prob (T this) {
+ return this->acceptor_prob;
+}
+
+
+int
+Junction_nindels (T this) {
+ return this->nindels;
+}
+
+int
+Junction_adj (T this) {
+ if (this->type == DEL_JUNCTION) {
+ return +this->nindels;
+ } else if (this->type == INS_JUNCTION) {
+ return -this->nindels;
+ } else {
+ return 0;
+ }
+}
+
+
+
+
+static char complCode[128] = COMPLEMENT_LC;
+
+static char *
+make_complement_inplace (char *sequence, unsigned int length) {
+ char temp;
+ unsigned int i, j;
+
+ for (i = 0, j = length-1; i < length/2; i++, j--) {
+ temp = complCode[(int) sequence[i]];
+ sequence[i] = complCode[(int) sequence[j]];
+ sequence[j] = temp;
+ }
+ if (i == j) {
+ sequence[i] = complCode[(int) sequence[i]];
+ }
+
+ return sequence;
+}
+
+char *
+Junction_deletion_string (T this, Genome_T genome, bool plusp) {
+ char *deletion_string;
+
+ deletion_string = (char *) MALLOC((this->nindels+1)*sizeof(char));
+ /* printf("deletionpos = %u\n",this->deletionpos); */
+ Genome_fill_buffer_simple(genome,this->deletionpos,this->nindels,deletion_string);
+ if (plusp == false) {
+ make_complement_inplace(deletion_string,this->nindels);
+ }
+
+ return deletion_string;
+}
+
+
+Chrpos_T
+Junction_splice_distance (T this) {
+ return this->splice_distance;
+}
+
+void
+Junction_set_unambiguous (T this, Chrpos_T distance, double donor_prob, double acceptor_prob) {
+ this->splice_distance = distance;
+ this->donor_prob = donor_prob;
+ this->acceptor_prob = acceptor_prob;
+
+ return;
+}
+
+
diff --git a/src/junction.h b/src/junction.h
new file mode 100644
index 0000000..46cffe6
--- /dev/null
+++ b/src/junction.h
@@ -0,0 +1,63 @@
+/* $Id: junction.h 166641 2015-05-29 21:13:04Z twu $ */
+#ifndef JUNCTION_INCLUDED
+#define JUNCTION_INCLUDED
+
+typedef enum {NO_JUNCTION, INS_JUNCTION, DEL_JUNCTION, SPLICE_JUNCTION,
+ CHIMERA_JUNCTION, AMB_JUNCTION, END_JUNCTION} Junctiontype_T;
+
+#include "types.h"
+#include "genomicpos.h"
+#include "bool.h"
+#include "genome.h"
+#include "list.h"
+
+
+#define T Junction_T
+typedef struct T *T;
+
+extern void
+Junction_print (T this);
+extern void
+Junction_free (T *old);
+extern void
+Junction_gc (List_T *list);
+
+extern T
+Junction_new_insertion (int nindels);
+extern T
+Junction_new_deletion (int nindels, Univcoord_T deletionpos);
+extern T
+Junction_new_splice (Chrpos_T splice_distance, int sensedir, double donor_prob, double acceptor_prob);
+
+extern T
+Junction_new_chimera (int sensedir, double donor_prob, double acceptor_prob);
+
+extern T
+Junction_copy (T old);
+
+
+extern Junctiontype_T
+Junction_type (T this);
+extern int
+Junction_sensedir (T this);
+extern double
+Junction_prob (T this);
+extern double
+Junction_donor_prob (T this);
+extern double
+Junction_acceptor_prob (T this);
+
+extern int
+Junction_nindels (T this);
+extern int
+Junction_adj (T this);
+extern char *
+Junction_deletion_string (T this, Genome_T genome, bool plusp);
+extern Chrpos_T
+Junction_splice_distance (T this);
+extern void
+Junction_set_unambiguous (T this, Chrpos_T distance, double donor_prob, double acceptor_prob);
+
+#undef T
+#endif
+
diff --git a/src/list.c b/src/list.c
index 81739fa..02e7eb7 100644
--- a/src/list.c
+++ b/src/list.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: list.c 161598 2015-03-21 02:37:54Z twu $";
+static char rcsid[] = "$Id: list.c 166641 2015-05-29 21:13:04Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -309,6 +309,16 @@ List_last_value (T this) {
return last->first;
}
+T
+List_last_item (T this) {
+ T last = NULL, r;
+
+ for (r = this; r != NULL; r = r->rest) {
+ last = r;
+ }
+ return last;
+}
+
void *
List_index (T this, int index) {
while (index-- > 0) {
diff --git a/src/list.h b/src/list.h
index c8b581e..7bd93ca 100644
--- a/src/list.h
+++ b/src/list.h
@@ -1,4 +1,4 @@
-/* $Id: list.h 161598 2015-03-21 02:37:54Z twu $ */
+/* $Id: list.h 166641 2015-05-29 21:13:04Z twu $ */
#ifndef LIST_INCLUDED
#define LIST_INCLUDED
@@ -33,6 +33,8 @@ List_dump (T list);
extern T List_append (T list, T tail);
extern void *
List_last_value (T this);
+extern T
+List_last_item (T this);
extern void *
List_index (T this, int index);
extern T
diff --git a/src/littleendian.h b/src/littleendian.h
index 7e6cd90..b28d150 100644
--- a/src/littleendian.h
+++ b/src/littleendian.h
@@ -1,8 +1,8 @@
-/* $Id: littleendian.h 115892 2013-11-20 22:52:31Z twu $ */
+/* $Id: littleendian.h 157223 2015-01-22 18:43:01Z twu $ */
#ifndef LITTLEENDIAN_INCLUDED
#define LITTLEENDIAN_INCLUDED
#ifdef HAVE_CONFIG_H
-#include <config.h>
+#include <config.h> /* For HAVE_64_BIT */
#endif
#include <stdio.h>
diff --git a/src/master.c b/src/master.c
new file mode 100644
index 0000000..d4a93a8
--- /dev/null
+++ b/src/master.c
@@ -0,0 +1,510 @@
+static char rcsid[] = "$Id: master.c 162088 2015-03-26 18:29:04Z twu $";
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "master.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef HAVE_PTHREAD
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h> /* Needed to define pthread_t on Solaris */
+#endif
+#include <pthread.h>
+#endif
+
+#include "mem.h"
+
+#ifdef USE_MPI
+#include "filestring.h"
+#endif
+#ifdef GSNAP
+#include "shortread.h"
+#endif
+
+
+#ifdef DEBUG
+#define debug(x) x
+#else
+#define debug(x)
+#endif
+
+
+typedef struct RRlist_T *RRlist_T;
+struct RRlist_T {
+ int nextchar_start;
+ int nextchar;
+ int offset_start_1;
+ int offset_start_2;
+ int offset_end_1;
+ int offset_end_2;
+ Filestring_T filestring1;
+ Filestring_T filestring2;
+ bool donep;
+
+ RRlist_T next;
+};
+
+
+#ifdef DEBUG
+static void
+RRlist_dump (RRlist_T head, RRlist_T tail) {
+ RRlist_T this;
+
+ fprintf(stdout,"head %p\n",head);
+ for (this = head; this != NULL; this = this->next) {
+ fprintf(stdout,"%p: offsets %d..%d and %d..%d, next %p\n",
+ this,this->offset_start_1,this->offset_end_1,this->offset_start_2,this->offset_end_2,this->next);
+ }
+ fprintf(stdout,"tail %p\n",tail);
+ fprintf(stdout,"\n");
+ return;
+}
+#endif
+
+
+/* Returns new tail */
+static RRlist_T
+RRlist_push (RRlist_T *head, RRlist_T tail, int nextchar_start, int nextchar, int offset_start_1, int offset_start_2,
+ int offset_end_1, int offset_end_2, Filestring_T filestring1, Filestring_T filestring2, bool donep) {
+ RRlist_T new;
+
+ new = (RRlist_T) MALLOC_OUT(sizeof(*new));
+ new->nextchar_start = nextchar_start;
+ new->nextchar = nextchar;
+ new->offset_start_1 = offset_start_1;
+ new->offset_start_2 = offset_start_2;
+ new->offset_end_1 = offset_end_1;
+ new->offset_end_2 = offset_end_2;
+ new->filestring1 = filestring1;
+ new->filestring2 = filestring2;
+ new->donep = donep;
+
+ new->next = (RRlist_T) NULL;
+
+ if (*head == NULL) { /* Equivalent to tail == NULL, but using *head avoids having to set tail in RRlist_pop */
+ *head = new;
+ } else {
+ tail->next = new;
+ }
+
+ return new;
+}
+
+/* Returns new head */
+static RRlist_T
+RRlist_pop (RRlist_T head, int *nextchar_start, int *nextchar, int *offset_start_1, int *offset_start_2,
+ int *offset_end_1, int *offset_end_2, Filestring_T *filestring1, Filestring_T *filestring2, bool *donep) {
+ RRlist_T newhead;
+
+ *nextchar_start = head->nextchar_start;
+ *nextchar = head->nextchar;
+ *offset_start_1 = head->offset_start_1;
+ *offset_start_2 = head->offset_start_2;
+ *offset_end_1 = head->offset_end_1;
+ *offset_end_2 = head->offset_end_2;
+ *filestring1 = head->filestring1;
+ *filestring2 = head->filestring2;
+ *donep = head->donep;
+
+ newhead = head->next;
+
+ FREE_OUT(head);
+ return newhead;
+}
+
+
+
+#define T Master_T
+struct T {
+#ifdef HAVE_PTHREAD
+ pthread_mutex_t lock;
+ pthread_cond_t input_wanted_p;
+ pthread_cond_t input_avail_p;
+#endif
+
+ /* Predicate for input_wanted_p */
+ int nwanted;
+
+ /* Predicate for input_avail_p */
+ RRlist_T head;
+ RRlist_T tail;
+
+ int ntotal;
+
+ int n_slave_ranks;
+ int nextchar;
+ int nchars1;
+ int nchars2;
+
+ FILE *input_parser;
+ FILE *input2_parser;
+#ifdef HAVE_ZLIB
+ gzFile gzipped;
+ gzFile gzipped2;
+#endif
+#ifdef HAVE_BZLIB
+ Bzip2_T bzipped;
+ Bzip2_T bzipped2;
+#endif
+
+ char **files;
+ int nfiles;
+ int nspaces;
+ int part_modulus;
+ int part_interval;
+};
+
+
+int
+Master_ntotal (T this) {
+ return this->ntotal;
+}
+
+
+/* Modeled after fill_buffer in inbuffer.c */
+T
+Master_new (int n_slave_ranks, int nextchar, int nchars1, int nchars2,
+ FILE *input_parser, FILE *input2_parser,
+#ifdef HAVE_ZLIB
+ gzFile gzipped, gzFile gzipped2,
+#endif
+#ifdef HAVE_BZLIB
+ Bzip2_T bzipped, Bzip2_T bzipped2,
+#endif
+ char **files, int nfiles, int nspaces, int part_modulus, int part_interval) {
+ T new = (T) MALLOC(sizeof(*new));
+
+#ifdef HAVE_PTHREAD
+ pthread_mutex_init(&new->lock,NULL);
+ pthread_cond_init(&new->input_wanted_p,NULL);
+ pthread_cond_init(&new->input_avail_p,NULL);
+#endif
+
+ new->nwanted = 0;
+ new->head = (RRlist_T) NULL;
+ new->tail = (RRlist_T) NULL;
+
+ new->ntotal = 0;
+
+ new->n_slave_ranks = n_slave_ranks;
+ new->nextchar = nextchar;
+ new->nchars1 = nchars1;
+ new->nchars2 = nchars2;
+
+ new->input_parser = input_parser;
+ new->input2_parser = input2_parser;
+
+#ifdef HAVE_ZLIB
+ new->gzipped = gzipped;
+ new->gzipped2 = gzipped2;
+#endif
+
+#ifdef HAVE_BZLIB
+ new->bzipped = bzipped;
+ new->bzipped2 = bzipped2;
+#endif
+
+ new->files = files;
+ new->nfiles = nfiles;
+ new->nspaces = nspaces;
+ new->part_modulus = part_modulus;
+ new->part_interval = part_interval;
+
+ return new;
+}
+
+void
+Master_free (T *old) {
+ pthread_cond_destroy(&(*old)->input_wanted_p);
+ pthread_cond_destroy(&(*old)->input_avail_p);
+ pthread_mutex_destroy(&(*old)->lock);
+
+ FREE(*old);
+ return;
+}
+
+
+
+/* Run only by rank 0, if output is designed to go to stdout */
+void *
+Master_write_stdout (void *data) {
+ int strlength;
+ char *string;
+
+ while (true) {
+ /* This may not work if worker is from rank 0 */
+ string = Filestring_recv(&strlength,/*source*/MPI_ANY_SOURCE,/*tag*/MPI_TAG_WRITE_STDOUT,MPI_COMM_WORLD);
+ fwrite(string,sizeof(char),strlength,stderr);
+ fwrite(string,sizeof(char),strlength,stdout);
+ }
+
+ return (void *) NULL;
+}
+
+
+/* Run only by rank 0 */
+/* Communicates below with Master_mpi_interface for ranks 1..n and with Master_self_interface for rank 0 */
+void *
+Master_parser (void *data) {
+ T this = (T) data;
+
+ int nextchar = this->nextchar;
+
+ int nspaces = this->nspaces;
+ int part_modulus = this->part_modulus;
+ int part_interval = this->part_interval;
+
+ bool donep = false;
+ int nskip;
+ Shortread_T queryseq1, queryseq2;
+ Filestring_T filestring1, filestring2;
+ int nextchar_start;
+ int offset_start_1, offset_end_1, offset_start_2, offset_end_2;
+
+#if 0
+ /* For some reason, this doesn't work */
+ offset_start_1 = nchars1;
+ offset_start_2 = nchars2;
+#else
+ offset_start_1 = 0;
+ offset_start_2 = 0;
+#endif
+
+ filestring1 = Filestring_new(/*id*/0);
+ filestring2 = Filestring_new(/*id*/0);
+
+ /* Skip to part_modulus */
+ if (part_modulus > 0) {
+ nskip = 0;
+ while (nskip < part_modulus &&
+ (queryseq1 = Shortread_read(&nextchar,&offset_start_1,&offset_start_2,&queryseq2,
+#ifdef USE_MPI
+ filestring1,filestring2,
+#endif
+ &this->input_parser,&this->input2_parser,
+#ifdef HAVE_ZLIB
+ &this->gzipped,&this->gzipped2,
+#endif
+#ifdef HAVE_BZLIB
+ &this->bzipped,&this->bzipped2,
+#endif
+ &this->files,&this->nfiles,/*skipp*/true)) != NULL) {
+ nskip++;
+ }
+ if (queryseq1 == NULL) {
+ donep = true;
+ }
+ }
+
+ Filestring_free(&filestring2);
+ Filestring_free(&filestring1);
+
+
+
+ nextchar_start = nextchar;
+ offset_end_1 = offset_start_1;
+ offset_end_2 = offset_start_2;
+
+ while (true) {
+ pthread_mutex_lock(&this->lock);
+ debug(fprintf(stdout,"nwanted is %d\n",this->nwanted));
+ while (this->nwanted == 0) {
+ pthread_cond_wait(&this->input_wanted_p,&this->lock);
+ }
+
+ filestring1 = Filestring_new(/*id*/0);
+ filestring2 = Filestring_new(/*id*/0);
+
+ nskip = 0;
+ while (nskip < nspaces * part_interval &&
+ (queryseq1 = Shortread_read(&nextchar,&offset_end_1,&offset_end_2,&queryseq2,
+#ifdef USE_MPI
+ filestring1,filestring2,
+#endif
+ &this->input_parser,&this->input2_parser,
+#ifdef HAVE_ZLIB
+ &this->gzipped,&this->gzipped2,
+#endif
+#ifdef HAVE_BZLIB
+ &this->bzipped,&this->bzipped2,
+#endif
+ &this->files,&this->nfiles,/*skipp*/true)) != NULL) {
+ nskip++;
+ }
+ this->ntotal += (nskip + part_interval - 1)/part_interval;
+
+ if (queryseq1 == NULL) {
+ donep = true;
+ }
+
+ this->tail = RRlist_push(&this->head,this->tail,nextchar_start,nextchar,offset_start_1,offset_start_2,
+ offset_end_1,offset_end_2,filestring1,filestring2,donep);
+ debug(RRlist_dump(this->head,this->tail));
+
+ nextchar_start = nextchar;
+ offset_start_1 = offset_end_1;
+ offset_start_2 = offset_end_2;
+
+ this->nwanted -= 1;
+ pthread_cond_signal(&this->input_avail_p);
+ pthread_mutex_unlock(&this->lock);
+ }
+
+ return (void *) NULL;
+}
+
+
+void
+Master_self_interface (T this, int *nextchar_start, int *nextchar,
+ int *offset_start_1, int *offset_start_2,
+ int *offset_end_1, int *offset_end_2,
+ Filestring_T *filestring1, Filestring_T *filestring2,
+ bool *donep) {
+
+ /* Get information from Master_parser */
+ debug(printf("Master_self_interface called and locking master\n"));
+ pthread_mutex_lock(&this->lock);
+ this->nwanted += 1;
+ pthread_cond_signal(&this->input_wanted_p);
+ pthread_mutex_unlock(&this->lock);
+
+ pthread_mutex_lock(&this->lock);
+ while (this->head == NULL) {
+ pthread_cond_wait(&this->input_avail_p,&this->lock);
+ }
+ this->head = RRlist_pop(this->head,&(*nextchar_start),&(*nextchar),&(*offset_start_1),&(*offset_start_2),&(*offset_end_1),&(*offset_end_2),
+ &(*filestring1),&(*filestring2),&(*donep));
+ debug(RRlist_dump(this->head,this->tail));
+
+ debug(printf("Master_self_interface unlocking master\n"));
+ pthread_mutex_unlock(&this->lock);
+
+ debug(fprintf(stdout,"Master_self_interface now returning\n"));
+ return;
+}
+
+
+
+#ifdef USE_MPI
+/* Run only by rank 0 */
+/* Communicates below with fill_buffer procedure of ranks 1..n, and above with Master_parser */
+/* Replaces Inbuffer_new for MPI master */
+void *
+Master_mpi_interface (void *data) {
+ T this = (T) data;
+
+ int n_slave_ranks = this->n_slave_ranks;
+ int nfiles_slave;
+ MPI_Status status;
+ int ranki;
+ int nextchar_start, nextchar = this->nextchar;
+ int offset_start_1, offset_start_2, offset_end_1, offset_end_2;
+ Filestring_T filestring1, filestring2;
+ bool donep;
+
+#ifdef HAVE_ZLIB
+ gzFile gzipped = this->gzipped;
+#endif
+#ifdef HAVE_BZLIB
+ Bzip2_T bzipped = this->bzipped;
+#endif
+
+
+ while (n_slave_ranks > 0) {
+ /* Need to send nextchar_end (nextchar at end of block)
+ because of the difference between filecontents end ('\0') and
+ FILE * end (EOF) */
+
+ MPI_RECV(&nfiles_slave,1,MPI_INT,/*source*/MPI_ANY_SOURCE,/*tag*/MPI_TAG_WANT_INPUT,MPI_COMM_WORLD,&status);
+ ranki = status.MPI_SOURCE;
+
+ /* Get information from Master_parser */
+ debug(printf("Master_mpi_interface locking master\n"));
+ pthread_mutex_lock(&this->lock);
+ this->nwanted += 1;
+ pthread_cond_signal(&this->input_wanted_p);
+ pthread_mutex_unlock(&this->lock);
+
+ pthread_mutex_lock(&this->lock);
+ while (this->head == NULL) {
+ pthread_cond_wait(&this->input_avail_p,&this->lock);
+ }
+ this->head = RRlist_pop(this->head,&nextchar_start,&nextchar,&offset_start_1,&offset_start_2,&offset_end_1,&offset_end_2,
+ &filestring1,&filestring2,&donep);
+ debug(RRlist_dump(this->head,this->tail));
+ debug(printf("Master_mpi_interface unlocking master\n"));
+ pthread_mutex_unlock(&this->lock);
+
+ debug(fprintf(stdout,"Master: received message from %d. Sending nextchars %c..%c, donep %d",
+ ranki,nextchar_start,nextchar,donep));
+ MPI_SEND(&nextchar_start,1,MPI_INT,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ MPI_SEND(&nextchar,1,MPI_INT,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ MPI_SEND(&donep,1,MPI_UNSIGNED_CHAR,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+
+#if defined(HAVE_ZLIB) && defined(HAVE_BZLIB)
+ if (gzipped == NULL && bzipped == NULL) {
+ debug(fprintf(stdout," Sending offsets %d..%d and %d..%d.\n",offset_start_1,offset_end_1,offset_start_2,offset_end_2));
+ MPI_SEND(&offset_start_1,1,MPI_INT,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ MPI_SEND(&offset_start_2,1,MPI_INT,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ MPI_SEND(&offset_end_1,1,MPI_INT,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ MPI_SEND(&offset_end_2,1,MPI_INT,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ } else {
+ debug(fprintf(stdout," Sending filestrings\n"));
+ Filestring_send(filestring1,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ Filestring_send(filestring2,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ }
+
+#elif defined(HAVE_ZLIB)
+ if (gzipped == NULL) {
+ debug(fprintf(stdout," Sending offsets %d..%d and %d..%d.\n",offset_start_1,offset_end_1,offset_start_2,offset_end_2));
+ debug(fprintf(stdout," Sending end offsets %d and %d.\n",offset_end_1,offset_end_2));
+ MPI_SEND(&offset_start_1,1,MPI_INT,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ MPI_SEND(&offset_start_2,1,MPI_INT,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ MPI_SEND(&offset_end_1,1,MPI_INT,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ MPI_SEND(&offset_end_2,1,MPI_INT,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ } else {
+ debug(fprintf(stdout," Sending filestrings\n"));
+ Filestring_send(filestring1,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ Filestring_send(filestring2,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ }
+
+#elif defined(HAVE_BZLIB)
+ if (bzipped == NULL) {
+ debug(fprintf(stdout," Sending offsets %d..%d and %d..%d.\n",offset_start_1,offset_end_1,offset_start_2,offset_end_2));
+ MPI_SEND(&offset_start_1,1,MPI_INT,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ MPI_SEND(&offset_start_2,1,MPI_INT,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ MPI_SEND(&offset_end_1,1,MPI_INT,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ MPI_SEND(&offset_end_2,1,MPI_INT,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ } else {
+ debug(fprintf(stdout," Sending filestrings\n"));
+ Filestring_send(filestring1,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ Filestring_send(filestring2,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ }
+
+#else
+ debug(fprintf(stdout," Sending offsets %d..%d and %d..%d.\n",offset_start_1,offset_end_1,offset_start_2,offset_end_2));
+ MPI_SEND(&offset_start_1,1,MPI_INT,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ MPI_SEND(&offset_start_2,1,MPI_INT,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ MPI_SEND(&offset_end_1,1,MPI_INT,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+ MPI_SEND(&offset_end_2,1,MPI_INT,/*dest*/ranki,/*tag*/MPI_TAG_GIVE_INPUT,MPI_COMM_WORLD);
+#endif
+
+ Filestring_free(&filestring2);
+ Filestring_free(&filestring1);
+
+ debug(fprintf(stdout,"\n"));
+
+ if (donep == true) {
+ n_slave_ranks -= 1;
+ debug(fprintf(stdout,"n_slave_ranks is now %d. Rank %d knows we are done.\n",n_slave_ranks,ranki));
+ }
+ }
+
+ debug(fprintf(stdout,"Master_mpi_interface now returning\n"));
+ return (void *) NULL;
+}
+
+#endif
+
diff --git a/src/master.h b/src/master.h
new file mode 100644
index 0000000..bdee50b
--- /dev/null
+++ b/src/master.h
@@ -0,0 +1,65 @@
+/* $Id: master.h 162088 2015-03-26 18:29:04Z twu $ */
+#ifndef MASTER_INCLUDED
+#define MASTER_INCLUDED
+#ifdef HAVE_CONFIG_H
+#include <config.h> /* For HAVE_ZLIB, HAVE_BZLIB, USE_MPI_FILE_INPUT */
+#endif
+
+#ifdef USE_MPI
+#include <mpi.h>
+#include "mpidebug.h"
+#endif
+
+#include <stdio.h>
+#include "bool.h"
+#include "filestring.h"
+
+#ifdef HAVE_ZLIB
+#include <zlib.h>
+#endif
+
+#ifdef HAVE_BZLIB
+#include "bzip2.h"
+#endif
+
+
+#define T Master_T
+typedef struct T *T;
+
+
+extern int
+Master_ntotal (T this);
+
+extern T
+Master_new (int n_slave_ranks, int nextchar, int nchars1, int nchars2,
+ FILE *input_parser, FILE *input2_parser,
+#ifdef HAVE_ZLIB
+ gzFile gzipped, gzFile gzipped2,
+#endif
+#ifdef HAVE_BZLIB
+ Bzip2_T bzipped, Bzip2_T bzipped2,
+#endif
+ char **files, int nfiles, int nspaces, int part_modulus, int part_interval);
+
+extern void
+Master_free (T *old);
+
+extern void *
+Master_write_stdout (void *data);
+
+extern void *
+Master_parser (void *data);
+
+extern void
+Master_self_interface (T this, int *nextchar_start, int *nextchar,
+ int *offset_start_1, int *offset_start_2,
+ int *offset_end_1, int *offset_end_2,
+ Filestring_T *filestring1, Filestring_T *filestring2,
+ bool *donep);
+
+extern void *
+Master_mpi_interface (void *data);
+
+#undef T
+#endif
+
diff --git a/src/match.h b/src/match.h
index c0fd143..57fd83a 100644
--- a/src/match.h
+++ b/src/match.h
@@ -1,6 +1,7 @@
-/* $Id: match.h 99737 2013-06-27 19:33:03Z twu $ */
+/* $Id: match.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef MATCH_INCLUDED
#define MATCH_INCLUDED
+
#include "bool.h"
#include "genomicpos.h"
#include "types.h"
diff --git a/src/matchdef.h b/src/matchdef.h
index 8572ae0..5e97679 100644
--- a/src/matchdef.h
+++ b/src/matchdef.h
@@ -1,6 +1,7 @@
-/* $Id: matchdef.h 99737 2013-06-27 19:33:03Z twu $ */
+/* $Id: matchdef.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef MATCHDEF_INCLUDED
#define MATCHDEF_INCLUDED
+
#include "bool.h"
#include "chrnum.h"
#include "genomicpos.h"
diff --git a/src/matchpool.h b/src/matchpool.h
index 58ecc4b..2ba1857 100644
--- a/src/matchpool.h
+++ b/src/matchpool.h
@@ -1,6 +1,7 @@
-/* $Id: matchpool.h 99737 2013-06-27 19:33:03Z twu $ */
+/* $Id: matchpool.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef MATCHPOOL_INCLUDED
#define MATCHPOOL_INCLUDED
+
#include "bool.h"
#include "iit-read-univ.h"
#include "genomicpos.h"
diff --git a/src/maxent_hr.h b/src/maxent_hr.h
index 75659db..32e710f 100644
--- a/src/maxent_hr.h
+++ b/src/maxent_hr.h
@@ -1,5 +1,7 @@
+/* $Id: maxent_hr.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef MAXENT_HR_INCLUDED
#define MAXENT_HR_INCLUDED
+
#include "genomicpos.h"
#include "types.h"
diff --git a/src/md5.c b/src/md5.c
index efab896..39dbd90 100644
--- a/src/md5.c
+++ b/src/md5.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: md5.c 40271 2011-05-28 02:29:18Z twu $";
+static char rcsid[] = "$Id: md5.c 155282 2014-12-12 19:42:54Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -275,11 +275,11 @@ MD5_compute (unsigned char *input, int input_len) {
void
-MD5_print (FILE *fp, unsigned char *digest) {
+MD5_print (Filestring_T fp, unsigned char *digest) {
int i;
for (i = 0; i < 16; i++) {
- fprintf(fp,"%02x", digest[i]);
+ FPRINTF(fp,"%02x", digest[i]);
}
return;
}
diff --git a/src/md5.h b/src/md5.h
index 99142cc..aabc343 100644
--- a/src/md5.h
+++ b/src/md5.h
@@ -1,13 +1,14 @@
-/* $Id: md5.h 40271 2011-05-28 02:29:18Z twu $ */
+/* $Id: md5.h 155282 2014-12-12 19:42:54Z twu $ */
#ifndef MD5_INCLUDED
#define MD5_INCLUDED
#include <stdio.h>
+#include "filestring.h"
extern unsigned char *
MD5_compute (unsigned char *input, int input_len);
extern void
-MD5_print (FILE *fp, unsigned char *digest);
+MD5_print (Filestring_T fp, unsigned char *digest);
#endif
diff --git a/src/mem.c b/src/mem.c
index 1b3d377..e324ed6 100644
--- a/src/mem.c
+++ b/src/mem.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: mem.c 153955 2014-11-24 17:54:45Z twu $";
+static char rcsid[] = "$Id: mem.c 155282 2014-12-12 19:42:54Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -69,7 +69,7 @@ struct sizelist {
struct sizelist *rest;
};
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
static pthread_mutex_t memusage_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_key_t key_memusage_std_stack; /* Standard pool: Memory that is used by a thread within a query */
static pthread_key_t key_memusage_std_sizelist;
@@ -85,6 +85,7 @@ static struct sizelist *memusage_std_sizelist = NULL;
static long int memusage_std_stack_max = 0;
static long int memusage_std_heap = 0;
static long int memusage_std_heap_max = 0;
+static long int memusage_keep = 0;
#endif
static long int memusage_in = 0; /* Input pool: Memory from inbuffer to threads */
@@ -92,7 +93,7 @@ static long int memusage_out = 0; /* Output pool: Memory from threads to outbuff
void
Mem_usage_init () {
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_key_create(&key_memusage_std_stack,NULL);
pthread_key_create(&key_memusage_std_sizelist,NULL);
pthread_key_create(&key_memusage_std_stack_max,NULL);
@@ -100,6 +101,7 @@ Mem_usage_init () {
pthread_key_create(&key_memusage_std_heap_max,NULL);
pthread_key_create(&key_memusage_keep,NULL);
pthread_key_create(&key_threadname,NULL);
+
pthread_setspecific(key_memusage_std_stack,(void *) 0);
pthread_setspecific(key_memusage_std_stack_max,(void *) 0);
pthread_setspecific(key_memusage_std_heap,(void *) 0);
@@ -110,6 +112,7 @@ Mem_usage_init () {
memusage_std_stack_max = 0;
memusage_std_heap = 0;
memusage_std_heap_max = 0;
+ memusage_keep = 0;
#endif
memusage_in = 0;
@@ -119,20 +122,25 @@ Mem_usage_init () {
void
-Mem_usage_set_threadname (const char *threadname) {
-#ifdef HAVE_PTHREAD
- pthread_setspecific(key_threadname,(void *) threadname);
+Mem_usage_set_threadname (char *threadname_in) {
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
+ pthread_setspecific(key_threadname,(void *) threadname_in);
+#else
+ threadname = threadname_in;
#endif
return;
}
void
Mem_usage_reset_heap_baseline (long int x) {
-#ifdef HAVE_PTHREAD
- char *threadname;
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
long int memusage_std_heap;
+#ifdef DEBUG_HEAP
+ char *threadname;
threadname = (char *) pthread_getspecific(key_threadname);
+#endif
+
memusage_std_heap = (long int) pthread_getspecific(key_memusage_std_heap);
debug_heap(printf("%ld %s: Reset memusage_std_heap to %ld\n",memusage_std_heap,threadname,x));
pthread_setspecific(key_memusage_std_heap,(void *) x);
@@ -144,11 +152,9 @@ Mem_usage_reset_heap_baseline (long int x) {
void
Mem_usage_reset_stack_max () {
-#ifdef HAVE_PTHREAD
- char *threadname;
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
long int memusage_std_stack_max;
- threadname = (char *) pthread_getspecific(key_threadname);
memusage_std_stack_max = (long int) pthread_getspecific(key_memusage_std_stack_max);
pthread_setspecific(key_memusage_std_stack_max,(void *) 0);
#else
@@ -158,11 +164,9 @@ Mem_usage_reset_stack_max () {
void
Mem_usage_reset_heap_max () {
-#ifdef HAVE_PTHREAD
- char *threadname;
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
long int memusage_std_heap_max;
- threadname = (char *) pthread_getspecific(key_threadname);
memusage_std_heap_max = (long int) pthread_getspecific(key_memusage_std_heap_max);
pthread_setspecific(key_memusage_std_heap_max,(void *) 0);
#else
@@ -175,17 +179,20 @@ void
Mem_usage_std_stack_add (long int x, const char *file, int line) {
struct sizelist *new;
-#ifdef HAVE_PTHREAD
- char *threadname;
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
long int memusage_std_stack, memusage_std_stack_max;
+#ifdef DEBUG_STACK
+ char *threadname;
threadname = (char *) pthread_getspecific(key_threadname);
+#endif
+
memusage_std_stack = (long int) pthread_getspecific(key_memusage_std_stack);
memusage_std_stack += x;
debug_stack(printf("%ld %s: ",memusage_std_stack,threadname));
pthread_setspecific(key_memusage_std_stack,(void *) memusage_std_stack);
- memusage_std_stack_max = pthread_getspecific(key_memusage_std_stack_max);
+ memusage_std_stack_max = (long int) pthread_getspecific(key_memusage_std_stack_max);
if (memusage_std_stack > memusage_std_stack_max) {
pthread_setspecific(key_memusage_std_stack_max,(void *) memusage_std_stack);
}
@@ -202,7 +209,7 @@ Mem_usage_std_stack_add (long int x, const char *file, int line) {
debug_stack(printf("%ld: ",memusage_std_stack));
if (memusage_std_stack > memusage_std_stack_max) {
- memusage_std_stack_max = memusage_std_stack);
+ memusage_std_stack_max = memusage_std_stack;
}
new = (struct sizelist *) malloc(sizeof(struct sizelist));
@@ -221,18 +228,21 @@ Mem_usage_std_stack_subtract (const char *file, int line) {
long int x;
struct sizelist *head;
-#ifdef HAVE_PTHREAD
- char *threadname;
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
long int memusage_std_stack;
struct sizelist *memusage_std_sizelist;
+#ifdef DEBUG_STACK
+ char *threadname;
+ threadname = (char *) pthread_getspecific(key_threadname);
+#endif
+
memusage_std_sizelist = (struct sizelist *) pthread_getspecific(key_memusage_std_sizelist);
x = memusage_std_sizelist->size;
head = memusage_std_sizelist->rest;
free(memusage_std_sizelist);
pthread_setspecific(key_memusage_std_sizelist,(void *) head);
- threadname = (char *) pthread_getspecific(key_threadname);
memusage_std_stack = (long int) pthread_getspecific(key_memusage_std_stack);
memusage_std_stack -= x;
debug_stack(printf("%ld %s: ",memusage_std_stack,threadname));
@@ -258,11 +268,14 @@ Mem_usage_std_stack_subtract (const char *file, int line) {
void
Mem_usage_std_heap_add (long int x) {
-#ifdef HAVE_PTHREAD
- char *threadname;
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
long int memusage_std_heap;
+#ifdef DEBUG_HEAP
+ char *threadname;
threadname = (char *) pthread_getspecific(key_threadname);
+#endif
+
memusage_std_heap = (long int) pthread_getspecific(key_memusage_std_heap);
memusage_std_heap += x;
debug_heap(printf("%ld %s: ",memusage_std_heap,threadname));
@@ -277,7 +290,7 @@ Mem_usage_std_heap_add (long int x) {
long int
Mem_usage_report_std_stack () {
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
return (long int) pthread_getspecific(key_memusage_std_stack);
#else
return memusage_std_stack;
@@ -286,7 +299,7 @@ Mem_usage_report_std_stack () {
long int
Mem_usage_report_std_heap () {
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
return (long int) pthread_getspecific(key_memusage_std_heap);
#else
return memusage_std_heap;
@@ -295,7 +308,7 @@ Mem_usage_report_std_heap () {
long int
Mem_usage_report_keep () {
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
return (long int) pthread_getspecific(key_memusage_keep);
#else
return memusage_keep;
@@ -304,7 +317,7 @@ Mem_usage_report_keep () {
long int
Mem_usage_report_std_stack_max () {
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
return (long int) pthread_getspecific(key_memusage_std_stack_max);
#else
return memusage_std_stack_max;
@@ -313,7 +326,7 @@ Mem_usage_report_std_stack_max () {
long int
Mem_usage_report_std_heap_max () {
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
return (long int) pthread_getspecific(key_memusage_std_heap_max);
#else
return memusage_std_heap_max;
@@ -371,7 +384,7 @@ Mem_alloc (size_t nbytes, const char *file, int line) {
static struct descriptor *bp;
unsigned h;
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_lock(&memusage_mutex);
long int memusage_std_heap, memusage_std_heap_max;
char *threadname;
@@ -382,7 +395,7 @@ Mem_alloc (size_t nbytes, const char *file, int line) {
ptr = malloc(nbytes);
#ifdef MEMUSAGE
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
threadname = (char *) pthread_getspecific(key_threadname);
memusage_std_heap = (long int) pthread_getspecific(key_memusage_std_heap);
memusage_std_heap += nbytes;
@@ -437,7 +450,7 @@ Mem_alloc (size_t nbytes, const char *file, int line) {
}
#ifdef MEMUSAGE
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_unlock(&memusage_mutex);
#endif
#endif
@@ -454,7 +467,7 @@ Mem_alloc_keep (size_t nbytes, const char *file, int line) {
static struct descriptor *bp;
unsigned h;
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_lock(&memusage_mutex);
long int memusage_keep;
char *threadname;
@@ -465,7 +478,7 @@ Mem_alloc_keep (size_t nbytes, const char *file, int line) {
ptr = malloc(nbytes);
#ifdef MEMUSAGE
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
threadname = (char *) pthread_getspecific(key_threadname);
memusage_keep = (long int) pthread_getspecific(key_memusage_keep);
memusage_keep += nbytes;
@@ -512,7 +525,7 @@ Mem_alloc_keep (size_t nbytes, const char *file, int line) {
}
#ifdef MEMUSAGE
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_unlock(&memusage_mutex);
#endif
#endif
@@ -527,7 +540,7 @@ Mem_alloc_in (size_t nbytes, const char *file, int line) {
static struct descriptor *bp;
unsigned h;
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_lock(&memusage_mutex);
#endif
#endif
@@ -576,7 +589,7 @@ Mem_alloc_in (size_t nbytes, const char *file, int line) {
}
#ifdef MEMUSAGE
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_unlock(&memusage_mutex);
#endif
#endif
@@ -591,7 +604,7 @@ Mem_alloc_out (size_t nbytes, const char *file, int line) {
static struct descriptor *bp;
unsigned h;
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_lock(&memusage_mutex);
#endif
#endif
@@ -640,7 +653,7 @@ Mem_alloc_out (size_t nbytes, const char *file, int line) {
}
#ifdef MEMUSAGE
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_unlock(&memusage_mutex);
#endif
#endif
@@ -663,7 +676,7 @@ Mem_calloc (size_t count, size_t nbytes, const char *file, int line) {
static struct descriptor *bp;
unsigned h;
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_lock(&memusage_mutex);
long int memusage_std_heap, memusage_std_heap_max;
char *threadname;
@@ -696,7 +709,7 @@ Mem_calloc (size_t count, size_t nbytes, const char *file, int line) {
#endif
#ifdef MEMUSAGE
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
threadname = (char *) pthread_getspecific(key_threadname);
memusage_std_heap = (long int) pthread_getspecific(key_memusage_std_heap);
memusage_std_heap += count*nbytes;
@@ -738,7 +751,7 @@ Mem_calloc (size_t count, size_t nbytes, const char *file, int line) {
}
#ifdef MEMUSAGE
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_unlock(&memusage_mutex);
#endif
#endif
@@ -754,7 +767,7 @@ Mem_calloc_keep (size_t count, size_t nbytes, const char *file, int line) {
static struct descriptor *bp;
unsigned h;
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_lock(&memusage_mutex);
long int memusage_keep;
char *threadname;
@@ -787,7 +800,7 @@ Mem_calloc_keep (size_t count, size_t nbytes, const char *file, int line) {
#endif
#ifdef MEMUSAGE
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
threadname = (char *) pthread_getspecific(key_threadname);
memusage_keep = (long int) pthread_getspecific(key_memusage_keep);
memusage_keep += count*nbytes;
@@ -821,7 +834,7 @@ Mem_calloc_keep (size_t count, size_t nbytes, const char *file, int line) {
}
#ifdef MEMUSAGE
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_unlock(&memusage_mutex);
#endif
#endif
@@ -837,7 +850,7 @@ Mem_calloc_in (size_t count, size_t nbytes, const char *file, int line) {
static struct descriptor *bp;
unsigned h;
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_lock(&memusage_mutex);
#endif
#endif
@@ -895,7 +908,7 @@ Mem_calloc_in (size_t count, size_t nbytes, const char *file, int line) {
}
#ifdef MEMUSAGE
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_unlock(&memusage_mutex);
#endif
#endif
@@ -910,7 +923,7 @@ Mem_calloc_out (size_t count, size_t nbytes, const char *file, int line) {
static struct descriptor *bp;
unsigned h;
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_lock(&memusage_mutex);
#endif
#endif
@@ -968,7 +981,7 @@ Mem_calloc_out (size_t count, size_t nbytes, const char *file, int line) {
}
#ifdef MEMUSAGE
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_unlock(&memusage_mutex);
#endif
#endif
@@ -983,7 +996,7 @@ Mem_calloc_no_exception (size_t count, size_t nbytes, const char *file, int line
static struct descriptor *bp;
unsigned h;
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_lock(&memusage_mutex);
long int memusage_std_heap;
char *threadname;
@@ -1003,7 +1016,7 @@ Mem_calloc_no_exception (size_t count, size_t nbytes, const char *file, int line
ptr = calloc(count, nbytes);
#ifdef MEMUSAGE
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
threadname = (char *) pthread_getspecific(key_threadname);
memusage_std_heap = (long int) pthread_getspecific(key_memusage_std_heap);
memusage_std_heap += count*nbytes;
@@ -1020,7 +1033,7 @@ Mem_calloc_no_exception (size_t count, size_t nbytes, const char *file, int line
#endif
#ifdef MEMUSAGE
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_unlock(&memusage_mutex);
#endif
#endif
@@ -1034,7 +1047,7 @@ Mem_free (void *ptr, const char *file, int line) {
struct descriptor *bp;
size_t nbytes;
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_lock(&memusage_mutex);
long int memusage_std_heap;
char *threadname;
@@ -1053,7 +1066,7 @@ Mem_free (void *ptr, const char *file, int line) {
Except_raise(&Mem_Failed, file, line);
} else {
nbytes = bp->size;
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
threadname = (char *) pthread_getspecific(key_threadname);
memusage_std_heap = (long int) pthread_getspecific(key_memusage_std_heap);
memusage_std_heap -= nbytes;
@@ -1084,7 +1097,7 @@ Mem_free (void *ptr, const char *file, int line) {
#endif
#ifdef MEMUSAGE
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_unlock(&memusage_mutex);
#endif
#endif
@@ -1099,7 +1112,7 @@ Mem_free_keep (void *ptr, const char *file, int line) {
struct descriptor *bp;
size_t nbytes;
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_lock(&memusage_mutex);
long int memusage_keep;
char *threadname;
@@ -1118,7 +1131,7 @@ Mem_free_keep (void *ptr, const char *file, int line) {
Except_raise(&Mem_Failed, file, line);
} else {
nbytes = bp->size;
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
threadname = (char *) pthread_getspecific(key_threadname);
memusage_keep = (long int) pthread_getspecific(key_memusage_keep);
memusage_keep -= nbytes;
@@ -1149,7 +1162,7 @@ Mem_free_keep (void *ptr, const char *file, int line) {
#endif
#ifdef MEMUSAGE
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_unlock(&memusage_mutex);
#endif
#endif
@@ -1164,7 +1177,7 @@ Mem_free_in (void *ptr, const char *file, int line) {
struct descriptor *bp;
size_t nbytes;
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_lock(&memusage_mutex);
#endif
#endif
@@ -1205,7 +1218,7 @@ Mem_free_in (void *ptr, const char *file, int line) {
#endif
#ifdef MEMUSAGE
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_unlock(&memusage_mutex);
#endif
#endif
@@ -1219,7 +1232,7 @@ Mem_free_out (void *ptr, const char *file, int line) {
struct descriptor *bp;
size_t nbytes;
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_lock(&memusage_mutex);
#endif
#endif
@@ -1260,7 +1273,7 @@ Mem_free_out (void *ptr, const char *file, int line) {
#endif
#ifdef MEMUSAGE
-#ifdef HAVE_PTHREAD
+#if !defined(USE_MPI) && defined(HAVE_PTHREAD)
pthread_mutex_unlock(&memusage_mutex);
#endif
#endif
diff --git a/src/mem.h b/src/mem.h
index 2149523..e607a5b 100644
--- a/src/mem.h
+++ b/src/mem.h
@@ -1,9 +1,8 @@
-/* $Id: mem.h 145990 2014-08-25 21:47:32Z twu $ */
+/* $Id: mem.h 157223 2015-01-22 18:43:01Z twu $ */
#ifndef MEM_INCLUDED
#define MEM_INCLUDED
-
#ifdef HAVE_CONFIG_H
-#include <config.h>
+#include <config.h> /* For HAVE_ALLOCA, HAVE_ALLOCA_H */
#endif
#include <stddef.h>
@@ -35,7 +34,7 @@ typedef enum {MAIN_STORAGE, INPUT_USAGE, OUTPUT_USAGE, WORKER_STORAGE} Memusage_
extern void
Mem_usage_init ();
extern void
-Mem_usage_set_threadname (const char *threadname);
+Mem_usage_set_threadname (char *threadname_in);
extern void
Mem_usage_reset_heap_baseline (long int x);
extern void
diff --git a/src/mpidebug.c b/src/mpidebug.c
new file mode 100644
index 0000000..8e02265
--- /dev/null
+++ b/src/mpidebug.c
@@ -0,0 +1,129 @@
+static char rcsid[] = "$Id: mpidebug.c 162090 2015-03-26 18:29:53Z twu $";
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "mpidebug.h"
+
+#include <stdio.h>
+#include "bool.h"
+#include "types.h"
+#include "genomicpos.h"
+
+static int myid;
+
+void
+MPI_Debug_setup (int myid_in) {
+ myid = myid_in;
+ return;
+}
+
+
+MPI_File
+MPI_fopen (char *filename, MPI_Comm comm) {
+ MPI_File mpi_input;
+#if 0
+ MPI_Datatype contig, filetype;
+#endif
+
+ MPI_File_open(comm,filename,MPI_MODE_RDONLY,MPI_INFO_NULL,&mpi_input);
+#if 0
+ MPI_Type_contiguous(endptr - startptr,MPI_BYTE,&contig);
+ MPI_Type_create_resized(contig,/*lowerbound*/0,/*extent*/filesize - startptr,&filetype);
+ MPI_Type_commit(&filetype);
+ MPI_File_set_view(mpi_input,/*disp*/startptr,MPI_BYTE,filetype,"native",MPI_INFO_NULL);
+#endif
+
+ return mpi_input;
+}
+
+
+
+static char *
+get_typename (MPI_Datatype datatype) {
+ if (datatype == MPI_INT) {
+ return "MPI_INT";
+ } else if (datatype == MPI_BOOL_T) {
+ return "MPI_BOOL_T";
+ } else if (datatype == MPI_CHAR) {
+ return "MPI_CHAR";
+ } else if (datatype == MPI_DOUBLE) {
+ return "MPI_DOUBLE";
+ } else if (datatype == MPI_FLOAT) {
+ return "MPI_FLOAT";
+ } else {
+ return "OTHER";
+ }
+}
+
+static void
+print_value (const void *buf, int count, MPI_Datatype datatype) {
+ if (datatype == MPI_INT) {
+ printf("%d",* (int *) buf);
+ } else if (datatype == MPI_BOOL_T) {
+ printf("%d",(int) (* (bool *) buf));
+ } else if (datatype == MPI_CHAR) {
+ printf("%s",(char *) buf);
+ } else if (datatype == MPI_DOUBLE) {
+ printf("%f",* (double *) buf);
+ } else if (datatype == MPI_FLOAT) {
+ printf("%f",* (float *) buf);
+ } else {
+ printf("??");
+ }
+}
+
+
+int
+MPI_Debug_Send (const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, const char *file, int line) {
+ printf("MPI_Send (%d->%d) at %s:%d: proc %d sending count %d of datatype %s and tag %d to dest %d: ",
+ myid,dest,file,line,myid,count,get_typename(datatype),tag,dest);
+ print_value(buf,count,datatype);
+ printf("\n");
+ return MPI_Send(buf,count,datatype,dest,tag,comm);
+}
+
+int
+MPI_Debug_Recv (void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status,
+ const char *file, int line) {
+ int result;
+ result = MPI_Recv(buf,count,datatype,source,tag,comm,&(*status));
+ printf("MPI_Recv (%d<-%d) at %s:%d: proc %d receiving count %d of datatype %s and tag %d from source %d: ",
+ myid,(*status).MPI_SOURCE,file,line,myid,count,get_typename(datatype),tag,(*status).MPI_SOURCE);
+ print_value(buf,count,datatype);
+ printf("\n");
+ return result;
+}
+
+int
+MPI_Debug_Isend (const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm,
+ MPI_Request *req, const char *file, int line) {
+ MPI_Status status;
+ int result;
+
+ printf("MPI_Isend (%d->%d) at %s:%d: proc %d sending count %d of datatype %s to dest %d: ",
+ myid,dest,file,line,myid,count,get_typename(datatype),dest);
+
+ print_value(buf,count,datatype);
+ printf("\n");
+
+ result = MPI_Isend(buf,count,datatype,dest,tag,comm,&(*req));
+ /* MPI_Wait(&(*req),&status); */
+ return result;
+}
+
+int
+MPI_Debug_Irecv (void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm,
+ MPI_Request *req, const char *file, int line) {
+ MPI_Status status;
+ int result;
+
+ result = MPI_Irecv(buf,count,datatype,source,tag,comm,&(*req));
+ MPI_Wait(&(*req),&status);
+ printf("MPI_Irecv (%d<-%d) at %s:%d: proc %d receiving count %d of datatype %s from source %d: ",
+ myid,status.MPI_SOURCE,file,line,myid,count,get_typename(datatype),status.MPI_SOURCE);
+
+ print_value(buf,count,datatype);
+ printf("\n");
+ return result;
+}
diff --git a/src/mpidebug.h b/src/mpidebug.h
new file mode 100644
index 0000000..6245dd1
--- /dev/null
+++ b/src/mpidebug.h
@@ -0,0 +1,51 @@
+/* $Id: mpidebug.h 162091 2015-03-26 18:30:04Z twu $ */
+#ifndef MPIDEBUG_INCLUDED
+#define MPIDEBUG_INCLUDED
+#include <mpi.h>
+
+
+#define MPI_TAG_DEFAULT 0
+#define MPI_TAG_WANT_INPUT 1
+#define MPI_TAG_GIVE_INPUT 2
+#define MPI_TAG_WRITE_STDOUT 3
+
+
+
+/* #define MPIDEBUG 1 */
+/* #define MPIDEBUG_I 1 */
+
+extern void
+MPI_Debug_setup (int myid_in);
+
+extern MPI_File
+MPI_fopen (char *filename, MPI_Comm comm);
+
+extern int
+MPI_Debug_Send (const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, const char *file, int line);
+extern int
+MPI_Debug_Recv (void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status, const char *file, int line);
+
+#ifdef MPIDEBUG
+#define MPI_SEND(buf,count,datatype,dest,tag,comm) MPI_Debug_Send(buf,count,datatype,dest,tag,comm,__FILE__,__LINE__)
+#define MPI_RECV(buf,count,datatype,source,tag,comm,status) MPI_Debug_Recv(buf,count,datatype,source,tag,comm,status,__FILE__,__LINE__)
+
+#else
+#define MPI_SEND(buf,count,datatype,dest,tag,comm) MPI_Send(buf,count,datatype,dest,tag,comm)
+#define MPI_RECV(buf,count,datatype,source,tag,comm,status) MPI_Recv(buf,count,datatype,source,tag,comm,status)
+#endif
+
+
+#ifdef MPIDEBUG_I
+#define MPI_ISEND(buf,count,datatype,dest,tag,comm,req) MPI_Debug_Isend(buf,count,datatype,dest,tag,comm,req,__FILE__,__LINE__)
+#define MPI_IRECV(buf,count,datatype,source,tag,comm,req) MPI_Debug_Irecv(buf,count,datatype,source,tag,comm,req,__FILE__,__LINE__)
+
+#else
+#define MPI_ISEND(buf,count,datatype,dest,tag,comm,req) MPI_Isend(buf,count,datatype,dest,tag,comm,req)
+#define MPI_IRECV(buf,count,datatype,source,tag,comm,req) MPI_Irecv(buf,count,datatype,source,tag,comm,req)
+#endif
+
+#define MPI_SSEND(buf,count,datatype,dest,tag,comm) MPI_Ssend(buf,count,datatype,dest,tag,comm)
+
+#endif
+
+
diff --git a/src/oligo.h b/src/oligo.h
index 045a338..2ea5776 100644
--- a/src/oligo.h
+++ b/src/oligo.h
@@ -1,6 +1,7 @@
-/* $Id: oligo.h 99737 2013-06-27 19:33:03Z twu $ */
+/* $Id: oligo.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef OLIGO_INCLUDED
#define OLIGO_INCLUDED
+
#include "bool.h"
#include "genomicpos.h"
#include "indexdb.h"
diff --git a/src/oligoindex_hr.c b/src/oligoindex_hr.c
index d5e452f..f4ebc19 100644
--- a/src/oligoindex_hr.c
+++ b/src/oligoindex_hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: oligoindex_hr.c 156817 2015-01-15 21:55:11Z twu $";
+static char rcsid[] = "$Id: oligoindex_hr.c 166641 2015-05-29 21:13:04Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -18,6 +18,11 @@ static char rcsid[] = "$Id: oligoindex_hr.c 156817 2015-01-15 21:55:11Z twu $";
#include "orderstat.h"
#include "cmet.h"
+#ifdef DEBUG14
+/* Need to change Makefile.am to include oligoindex_old.c and oligoindex_old.h */
+#include "oligoindex_old.h"
+#endif
+
#ifndef USE_DIAGPOOL
#include "diag.h"
#endif
@@ -25,6 +30,9 @@ static char rcsid[] = "$Id: oligoindex_hr.c 156817 2015-01-15 21:55:11Z twu $";
#ifdef HAVE_SSE2
#include <emmintrin.h>
#endif
+#ifdef HAVE_SSSE3
+#include <tmmintrin.h>
+#endif
#ifdef HAVE_SSE4_1
#include <smmintrin.h>
#endif
@@ -32,6 +40,8 @@ static char rcsid[] = "$Id: oligoindex_hr.c 156817 2015-01-15 21:55:11Z twu $";
#ifdef HAVE_SSE2
#define USE_SIMD_FOR_COUNTS 1
+#else
+#define INDIVIDUAL_SHIFTS 1
#endif
#define THETADIFF1 20.0
@@ -64,7 +74,8 @@ struct T {
int diag_lookback;
int suffnconsecutive;
- bool query_evaluated_p;
+ /* bool query_evaluated_p; */
+
Oligospace_T oligospace;
#ifdef HAVE_SSE2
__m128i *inquery_allocated;
@@ -76,11 +87,11 @@ struct T {
Count_T *counts;
#ifdef PMAP
int *relevant_counts;
- bool *overabundant;
#endif
Chrpos_T **positions;
+ Chrpos_T *positions_space;
Chrpos_T **pointers;
-
+ Chrpos_T **pointers_allocated;
};
struct Oligoindex_array_T {
@@ -136,6 +147,56 @@ struct Oligoindex_array_T {
#endif
+#if defined(DEBUG)
+#ifdef HAVE_SSE2
+/* For debugging of SIMD procedures*/
+static void
+print_vector (__m128i x, char *label) {
+ __m128i a[1];
+ unsigned int *s = a;
+
+ _mm_store_si128(a,x);
+ _mm_mfence();
+ printf("%s: %u\n",label,s[0]);
+ printf("%s: %u\n",label,s[1]);
+ printf("%s: %u\n",label,s[2]);
+ printf("%s: %u\n",label,s[3]);
+ return;
+}
+
+/* For debugging of SIMD procedures*/
+static void
+print_counts (__m128i x, char *label) {
+ __m128i a[1];
+ Count_T *s = a;
+
+ _mm_store_si128(a,x);
+ _mm_mfence();
+ printf("%s:",label);
+ printf(" %hd",s[0]);
+ printf(" %hd",s[1]);
+ printf(" %hd",s[2]);
+ printf(" %hd",s[3]);
+ printf(" %hd",s[4]);
+ printf(" %hd",s[5]);
+ printf(" %hd",s[6]);
+ printf(" %hd",s[7]);
+ printf(" %hd",s[8]);
+ printf(" %hd",s[9]);
+ printf(" %hd",s[10]);
+ printf(" %hd",s[11]);
+ printf(" %hd",s[12]);
+ printf(" %hd",s[13]);
+ printf(" %hd",s[14]);
+ printf(" %hd",s[15]);
+ printf("\n");
+ return;
+}
+#endif
+#endif
+
+
+#if !defined(HAVE_SSE2) || defined(CHECK_ASSERTIONS)
static const Genomecomp_T reverse_nt[] =
{0x0000,0x4000,0x8000,0xC000,0x1000,0x5000,0x9000,0xD000,
0x2000,0x6000,0xA000,0xE000,0x3000,0x7000,0xB000,0xF000,
@@ -8330,6 +8391,7 @@ static const Genomecomp_T reverse_nt[] =
0x0FFF,0x4FFF,0x8FFF,0xCFFF,0x1FFF,0x5FFF,0x9FFF,0xDFFF,
0x2FFF,0x6FFF,0xAFFF,0xEFFF,0x3FFF,0x7FFF,0xBFFF,0xFFFF,
};
+#endif
@@ -8360,14 +8422,12 @@ static const Genomecomp_T reverse_nt[] =
#if defined(GSNAP)
-/* Have fewer to enable speedup. Note: Including 7-mers causes an 8x
- increase in run-time for score_querypos, and including 6-mers causes a
- 30x increase. */
-#define NOLIGOINDICES_MAJOR 1
-static int indexsizes_major[NOLIGOINDICES_MAJOR] = {8};
-static Shortoligomer_T masks_major[NOLIGOINDICES_MAJOR] = {STRAIGHT_MASK_8};
-static int diag_lookbacks_major[NOLIGOINDICES_MAJOR] = {120};
-static int suffnconsecutives_major[NOLIGOINDICES_MAJOR] = {20};
+#define NOLIGOINDICES_MAJOR 3
+static int indexsizes_major[NOLIGOINDICES_MAJOR] = {9, 8, 7};
+static Shortoligomer_T masks_major[NOLIGOINDICES_MAJOR] = {STRAIGHT_MASK_9, STRAIGHT_MASK_8, STRAIGHT_MASK_7};
+static int diag_lookbacks_major[NOLIGOINDICES_MAJOR] = {120, 60, 30};
+static int suffnconsecutives_major[NOLIGOINDICES_MAJOR] = {10, 10, 10};
+/* previously was 20, 15, 10, but with limit of 256 hits, need to be equal */
#define NOLIGOINDICES_MINOR 3
static int indexsizes_minor[NOLIGOINDICES_MINOR] = {8, 7, 6};
@@ -8379,8 +8439,8 @@ static int suffnconsecutives_minor[NOLIGOINDICES_MINOR] = {10, 10, 10};
#else
#define NOLIGOINDICES_MAJOR 3
-static int indexsizes_major[NOLIGOINDICES_MAJOR] = {8, 7, 6};
-static Shortoligomer_T masks_major[NOLIGOINDICES_MAJOR] = {STRAIGHT_MASK_8, STRAIGHT_MASK_7, STRAIGHT_MASK_6};
+static int indexsizes_major[NOLIGOINDICES_MAJOR] = {9, 8, 7};
+static Shortoligomer_T masks_major[NOLIGOINDICES_MAJOR] = {STRAIGHT_MASK_9, STRAIGHT_MASK_8, STRAIGHT_MASK_7};
static int diag_lookbacks_major[NOLIGOINDICES_MAJOR] = {120, 60, 30};
static int suffnconsecutives_major[NOLIGOINDICES_MAJOR] = {10, 10, 10};
/* previously was 20, 15, 10, but with limit of 256 hits, need to be equal */
@@ -8395,7 +8455,7 @@ static int suffnconsecutives_minor[NOLIGOINDICES_MINOR] = {10, 10, 10};
#endif
-
+#define MASK9 0x0003FFFF
#define MASK8 0x0000FFFF
#define MASK7 0x00003FFF
#define MASK6 0x00000FFF
@@ -8406,6 +8466,7 @@ static Mode_T mode;
#ifdef USE_SIMD_FOR_COUNTS
+static __m128i mask9;
static __m128i mask8;
static __m128i mask7;
static __m128i mask6;
@@ -8418,11 +8479,17 @@ Oligoindex_hr_setup (Genomecomp_T *ref_blocks_in, Mode_T mode_in) {
ref_blocks = ref_blocks_in;
mode = mode_in;
#ifdef USE_SIMD_FOR_COUNTS
+ mask9 = _mm_set1_epi32(262143U);
mask8 = _mm_set1_epi32(65535U);
mask7 = _mm_set1_epi32(16383U);
mask6 = _mm_set1_epi32(4095U);
mask5 = _mm_set1_epi32(1023U);
#endif
+
+#ifdef DEBUG14
+ Oligoindex_old_setup(ref_blocks_in,mode_in);
+#endif
+
return;
}
@@ -8466,7 +8533,7 @@ Oligoindex_new (int indexsize, int diag_lookback, int suffnconsecutive
new->diag_lookback = diag_lookback;
new->suffnconsecutive = suffnconsecutive;
- new->query_evaluated_p = false;
+ /* new->query_evaluated_p = false; */
#ifdef HAVE_SSE2
new->inquery_allocated = (__m128i *) _mm_malloc(new->oligospace * sizeof(Count_T),16);
new->counts_allocated = (__m128i *) _mm_malloc(new->oligospace * sizeof(Count_T),16);
@@ -8480,7 +8547,7 @@ Oligoindex_new (int indexsize, int diag_lookback, int suffnconsecutive
#endif
#ifdef HAVE_SSE2
- memset((void *) new->inquery,/*false*/0x00,new->oligospace*sizeof(Count_T));
+ memset((void *) new->inquery,INQUERY_FALSE,new->oligospace*sizeof(Count_T));
#else
memset((void *) new->inquery,false,new->oligospace*sizeof(bool));
#endif
@@ -8489,10 +8556,11 @@ Oligoindex_new (int indexsize, int diag_lookback, int suffnconsecutive
#ifdef PMAP
new->relevant_counts = (int *) CALLOC(new->oligospace,sizeof(int));
- new->overabundant = (bool *) CALLOC(new->oligospace,sizeof(bool));
#endif
- new->positions = (Chrpos_T **) CALLOC(new->oligospace+1,sizeof(Chrpos_T *));
- new->pointers = (Chrpos_T **) CALLOC(new->oligospace,sizeof(Chrpos_T *));
+ new->pointers_allocated = (Chrpos_T **) MALLOC((new->oligospace+1) * sizeof(Chrpos_T *));
+ new->pointers = &(new->pointers_allocated[1]);
+ new->positions_space = (Chrpos_T *) NULL;
+ new->positions = (Chrpos_T **) MALLOC(new->oligospace * sizeof(Chrpos_T *));
return new;
}
@@ -8644,7 +8712,7 @@ Genome_print_blocks (Genomecomp_T *blocks, Univcoord_T startpos, Univcoord_T end
/* 87654321 */
#define LOW_TWO_BITS 0x00000003
-#if defined(DEBUG) || defined(DEBUG9)
+#if defined(DEBUG) || defined(DEBUG9) || defined(DEBUG14)
static char *
shortoligo_nt (Shortoligomer_T oligo, int oligosize) {
char *nt;
@@ -8669,19 +8737,25 @@ shortoligo_nt (Shortoligomer_T oligo, int oligosize) {
}
#endif
-#ifdef DEBUG9
+#ifdef DEBUG
static void
-dump_positions (Chrpos_T **positions, Count_T *counts, int oligospace, int indexsize) {
+dump_allocations (Chrpos_T **positions, Count_T *counts, int oligospace, int indexsize,
+ Chrpos_T *positions_space) {
int i;
char *nt;
+ Chrpos_T *lastptr = positions_space;
- printf("Entered dump_positions with oligospace %d\n",oligospace);
+ printf("Entered dump_allocations with oligospace %d\n",oligospace);
for (i = 0; i < oligospace; i++) {
nt = shortoligo_nt(i,indexsize);
- if (counts[i] >= 1) {
- printf("Oligo_hr %s => %d entries: %u...%u\n",
- nt,counts[i],positions[i][0],positions[i][counts[i]-1]);
+ if (counts[i] == 0) {
+ printf("Oligo_hr %s (%llu) => %u entries\n",
+ nt,(unsigned long long) i,counts[i]);
+ } else {
+ printf("Oligo_hr %s (%llu) => %u entries: allocation %p (%d entries)\n",
+ nt,(unsigned long long) i,counts[i],positions[i],positions[i] - lastptr);
+ lastptr = positions[i];
}
FREE(nt);
}
@@ -8690,21 +8764,59 @@ dump_positions (Chrpos_T **positions, Count_T *counts, int oligospace, int index
}
#endif
+#if defined(DEBUG) || defined(DEBUG9)
+static void
+dump_positions (Chrpos_T **positions, Count_T *counts, Count_T *inquery, int oligospace, int indexsize) {
+ int i;
+ char *nt;
+
+ printf("Entered dump_positions new with oligospace %d\n",oligospace);
+
+ for (i = 0; i < oligospace; i++) {
+ if (inquery[i] == INQUERY_TRUE) {
+ nt = shortoligo_nt(i,indexsize);
+ if (counts[i] == 0) {
+ printf("Oligo_hr %s => 0 entries\n",nt);
+ } else {
+ printf("Oligo_hr %s => %d entries: %u...%u\n",
+ nt,counts[i],positions[i][0],positions[i][counts[i]-1]);
+ }
+ FREE(nt);
+ }
+ }
+
+ return;
+}
+#endif
+
+
/************************************************************************
* Counting and storage procedures. We count the number of
* occurrences of each oligomer in the genomic region, modulo 256
* (because Count_T is an unsigned char). The allocate_positions
- * procedure then assigns pointers (which advance) and positions
+ * procedure then assigns pointers_end (which start at the end of
+ * each positions block and go backward) and positions
* (which stay fixed) based on those counts, except that oligomers
- * not in the query sequence have their counts set to 0, and no
+ * not in the query sequence have their counts set to 0, and have no
* space allocated. However, during storage, if a pointer hits the
- * next position, that must mean that the count cycled past 255. We
- * set that count to be 0, so that oligomer is not used by
+ * beginning of the position block, that must mean that the count cycled
+ * past 255. We set that count to be 0, so that oligomer is not used by
* Oligomer_get_mappings. A count greater that 255 is overabundant
- * and not useful in stage 2. We would normally check whether
- * pointers[masked] == positions[masked+1], but by providing
- * &(positions[1]), we can check pointers[masked] ==
- * positions[masked] instead.
+ * and not useful in stage 2.
+ ************************************************************************/
+
+/************************************************************************
+ * Use SIMD to process 64 k-mers at a time:
+ * extract_*mers_{fwd|rev}_simd
+ * count_fwdrev_simd
+ * store_fwdrev_simd
+ *
+ * Use a special procedure to compute an odd block of 32 k-mers
+ * count_*mers_{fwd|rev}
+ * This procedure can use SIMD if we compute backwards
+ *
+ * Use a slow procedure to compute the start and end blocks
+ * count_*mers_{fwd|rev}_partial
************************************************************************/
@@ -8713,231 +8825,235 @@ dump_positions (Chrpos_T **positions, Count_T *counts, int oligospace, int index
************************************************************************/
static void
-count_8mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev,
+count_9mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev,
int startdiscard, int enddiscard) {
Genomecomp_T masked;
int pos;
- pos = startdiscard;
- while (pos <= enddiscard && pos <= 8) {
- masked = high_rev >> (16 - 2*pos);
- masked &= MASK8;
+ pos = enddiscard;
+ while (pos >= startdiscard && pos >= 24) {
+ masked = nexthigh_rev >> (78 - 2*pos);
+ masked |= low_rev << (2*pos - 46);
+ masked &= MASK9;
counts[masked] += 1;
debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos++;
+ pos--;
}
- while (pos <= enddiscard && pos <= 15) {
- masked = low_rev >> (48 - 2*pos);
- masked |= high_rev << (2*pos - 16);
- masked &= MASK8;
- counts[masked] += 1;
+ while (pos >= startdiscard && pos >= 16) {
+ masked = low_rev >> (46 - 2*pos);
+ masked &= MASK9;
debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos++;
+ pos--;
}
- while (pos <= enddiscard && pos <= 24) {
- masked = low_rev >> (48 - 2*pos);
- masked &= MASK8;
+ while (pos >= startdiscard && pos >= 8) {
+ masked = low_rev >> (46 - 2*pos);
+ masked |= high_rev << (2*pos - 14);
+ masked &= MASK9;
counts[masked] += 1;
debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos++;
+ pos--;
}
-
- while (pos <= enddiscard && pos <= 31) {
- masked = nexthigh_rev >> (80 - 2*pos);
- masked |= low_rev << (2*pos - 48);
- masked &= MASK8;
+
+ while (pos >= startdiscard) {
+ masked = high_rev >> (14 - 2*pos);
+ masked &= MASK9;
counts[masked] += 1;
debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos++;
+ pos--;
}
return;
}
static int
-store_8mers_fwd_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+store_9mers_fwd_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev,
int startdiscard, int enddiscard) {
Genomecomp_T masked;
int pos;
- pos = startdiscard;
- while (pos <= enddiscard && pos <= 8) {
- masked = high_rev >> (16 - 2*pos);
- masked &= MASK8;
+ pos = enddiscard;
+ while (pos >= startdiscard && pos >= 24) {
+ masked = nexthigh_rev >> (78 - 2*pos);
+ masked |= low_rev << (2*pos - 46);
+ masked &= MASK9;
debug(printf("%d %04X\n",pos,masked));
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ *(--pointers[masked]) = chrpos;
}
}
- chrpos++;
- pos++;
+ chrpos--;
+ pos--;
}
- while (pos <= enddiscard && pos <= 15) {
- masked = low_rev >> (48 - 2*pos);
- masked |= high_rev << (2*pos - 16);
- masked &= MASK8;
+ while (pos >= startdiscard && pos >= 16) {
+ masked = low_rev >> (46 - 2*pos);
+ masked &= MASK9;
debug(printf("%d %04X\n",pos,masked));
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ *(--pointers[masked]) = chrpos;
}
}
- chrpos++;
- pos++;
+ chrpos--;
+ pos--;
}
-
- while (pos <= enddiscard && pos <= 24) {
- masked = low_rev >> (48 - 2*pos);
- masked &= MASK8;
+
+ while (pos >= startdiscard && pos >= 8) {
+ masked = low_rev >> (46 - 2*pos);
+ masked |= high_rev << (2*pos - 14);
+ masked &= MASK9;
debug(printf("%d %04X\n",pos,masked));
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ *(--pointers[masked]) = chrpos;
}
}
- chrpos++;
- pos++;
+ chrpos--;
+ pos--;
}
-
- while (pos <= enddiscard && pos <= 31) {
- masked = nexthigh_rev >> (80 - 2*pos);
- masked |= low_rev << (2*pos - 48);
- masked &= MASK8;
+
+ while (pos >= startdiscard) {
+ masked = high_rev >> (14 - 2*pos);
+ masked &= MASK9;
debug(printf("%d %04X\n",pos,masked));
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ *(--pointers[masked]) = chrpos;
}
}
- chrpos++;
- pos++;
+ chrpos--;
+ pos--;
}
return chrpos;
}
+
static void
-count_7mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev,
+count_8mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev,
int startdiscard, int enddiscard) {
Genomecomp_T masked;
int pos;
- pos = startdiscard;
- while (pos <= enddiscard && pos <= 9) {
- masked = high_rev >> (18 - 2*pos);
- masked &= MASK7;
+ pos = enddiscard;
+ while (pos >= startdiscard && pos >= 25) {
+ masked = nexthigh_rev >> (80 - 2*pos);
+ masked |= low_rev << (2*pos - 48);
+ masked &= MASK8;
counts[masked] += 1;
debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos++;
+ pos--;
}
- while (pos <= enddiscard && pos <= 15) {
- masked = low_rev >> (50 - 2*pos);
- masked |= high_rev << (2*pos - 18);
- masked &= MASK7;
+ while (pos >= startdiscard && pos >= 16) {
+ masked = low_rev >> (48 - 2*pos);
+ masked &= MASK8;
counts[masked] += 1;
debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos++;
+ pos--;
}
- while (pos <= enddiscard && pos <= 25) {
- masked = low_rev >> (50 - 2*pos);
- masked &= MASK7;
+ while (pos >= startdiscard && pos >= 9) {
+ masked = low_rev >> (48 - 2*pos);
+ masked |= high_rev << (2*pos - 16);
+ masked &= MASK8;
counts[masked] += 1;
debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos++;
+ pos--;
}
-
- while (pos <= enddiscard && pos <= 31) {
- masked = nexthigh_rev >> (82 - 2*pos);
- masked |= low_rev << (2*pos - 50);
- masked &= MASK7;
+
+ while (pos >= startdiscard) {
+ masked = high_rev >> (16 - 2*pos);
+ masked &= MASK8;
counts[masked] += 1;
debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos++;
+ pos--;
}
return;
}
static int
-store_7mers_fwd_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+store_8mers_fwd_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev,
int startdiscard, int enddiscard) {
Genomecomp_T masked;
int pos;
- pos = startdiscard;
- while (pos <= enddiscard && pos <= 9) {
- masked = high_rev >> (18 - 2*pos);
- masked &= MASK7;
+ pos = enddiscard;
+ while (pos >= startdiscard && pos >= 25) {
+ masked = nexthigh_rev >> (80 - 2*pos);
+ masked |= low_rev << (2*pos - 48);
+ masked &= MASK8;
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
+ *(--pointers[masked]) = chrpos;
}
}
- chrpos++;
- pos++;
+ chrpos--;
+ pos--;
}
- while (pos <= enddiscard && pos <= 15) {
- masked = low_rev >> (50 - 2*pos);
- masked |= high_rev << (2*pos - 18);
- masked &= MASK7;
+ while (pos >= startdiscard && pos >= 16) {
+ masked = low_rev >> (48 - 2*pos);
+ masked &= MASK8;
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
+ *(--pointers[masked]) = chrpos;
}
}
- chrpos++;
- pos++;
+ chrpos--;
+ pos--;
}
-
- while (pos <= enddiscard && pos <= 25) {
- masked = low_rev >> (50 - 2*pos);
- masked &= MASK7;
+
+ while (pos >= startdiscard && pos >= 9) {
+ masked = low_rev >> (48 - 2*pos);
+ masked |= high_rev << (2*pos - 16);
+ masked &= MASK8;
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
+ *(--pointers[masked]) = chrpos;
}
}
- chrpos++;
- pos++;
+ chrpos--;
+ pos--;
}
-
- while (pos <= enddiscard && pos <= 31) {
- masked = nexthigh_rev >> (82 - 2*pos);
- masked |= low_rev << (2*pos - 50);
- masked &= MASK7;
+
+ while (pos >= startdiscard) {
+ masked = high_rev >> (16 - 2*pos);
+ masked &= MASK8;
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
+ *(--pointers[masked]) = chrpos;
}
}
- chrpos++;
- pos++;
+ chrpos--;
+ pos--;
}
return chrpos;
@@ -8945,114 +9061,228 @@ store_7mers_fwd_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positi
static void
-count_6mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev,
+count_7mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev,
int startdiscard, int enddiscard) {
Genomecomp_T masked;
int pos;
- pos = startdiscard;
- while (pos <= enddiscard && pos <= 10) {
- masked = high_rev >> (20 - 2*pos);
- masked &= MASK6;
+ pos = enddiscard;
+ while (pos >= startdiscard && pos >= 26) {
+ masked = nexthigh_rev >> (82 - 2*pos);
+ masked |= low_rev << (2*pos - 50);
+ masked &= MASK7;
counts[masked] += 1;
debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos++;
+ pos--;
}
- while (pos <= enddiscard && pos <= 15) {
- masked = low_rev >> (52 - 2*pos);
- masked |= high_rev << (2*pos - 20);
- masked &= MASK6;
+ while (pos >= startdiscard && pos >= 16) {
+ masked = low_rev >> (50 - 2*pos);
+ masked &= MASK7;
counts[masked] += 1;
debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos++;
+ pos--;
}
- while (pos <= enddiscard && pos <= 26) {
- masked = low_rev >> (52 - 2*pos);
- masked &= MASK6;
+ while (pos >= startdiscard && pos >= 10) {
+ masked = low_rev >> (50 - 2*pos);
+ masked |= high_rev << (2*pos - 18);
+ masked &= MASK7;
counts[masked] += 1;
debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos++;
+ pos--;
}
-
- while (pos <= enddiscard && pos <= 31) {
- masked = nexthigh_rev >> (84 - 2*pos);
- masked |= low_rev << (2*pos - 52);
- masked &= MASK6;
+
+ while (pos >= startdiscard) {
+ masked = high_rev >> (18 - 2*pos);
+ masked &= MASK7;
counts[masked] += 1;
debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos++;
+ pos--;
}
-
+
return;
}
-
static int
-store_6mers_fwd_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+store_7mers_fwd_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev,
int startdiscard, int enddiscard) {
Genomecomp_T masked;
int pos;
- pos = startdiscard;
- while (pos <= enddiscard && pos <= 10) {
- masked = high_rev >> (20 - 2*pos);
- masked &= MASK6;
+ pos = enddiscard;
+ while (pos >= startdiscard && pos >= 26) {
+ masked = nexthigh_rev >> (82 - 2*pos);
+ masked |= low_rev << (2*pos - 50);
+ masked &= MASK7;
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ *(--pointers[masked]) = chrpos;
}
}
- chrpos++;
- pos++;
+ chrpos--;
+ pos--;
}
- while (pos <= enddiscard && pos <= 15) {
+ while (pos >= startdiscard && pos >= 16) {
+ masked = low_rev >> (50 - 2*pos);
+ masked &= MASK7;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos--;
+ }
+
+ while (pos >= startdiscard && pos >= 10) {
+ masked = low_rev >> (50 - 2*pos);
+ masked |= high_rev << (2*pos - 18);
+ masked &= MASK7;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos--;
+ }
+
+ while (pos >= startdiscard) {
+ masked = high_rev >> (18 - 2*pos);
+ masked &= MASK7;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos--;
+ }
+
+ return chrpos;
+}
+
+
+static void
+count_6mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev,
+ int startdiscard, int enddiscard) {
+ Genomecomp_T masked;
+ int pos;
+
+ pos = enddiscard;
+ while (pos >= startdiscard && pos >= 27) {
+ masked = nexthigh_rev >> (84 - 2*pos);
+ masked |= low_rev << (2*pos - 52);
+ masked &= MASK6;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos--;
+ }
+
+ while (pos >= startdiscard && pos >= 16) {
+ masked = low_rev >> (52 - 2*pos);
+ masked &= MASK6;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos--;
+ }
+
+ while (pos >= startdiscard && pos >= 11) {
masked = low_rev >> (52 - 2*pos);
masked |= high_rev << (2*pos - 20);
masked &= MASK6;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos--;
+ }
+
+ while (pos >= startdiscard) {
+ masked = high_rev >> (20 - 2*pos);
+ masked &= MASK6;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos--;
+ }
+
+ return;
+}
+
+
+static int
+store_6mers_fwd_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+ Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev,
+ int startdiscard, int enddiscard) {
+ Genomecomp_T masked;
+ int pos;
+
+ pos = enddiscard;
+ while (pos >= startdiscard && pos >= 27) {
+ masked = nexthigh_rev >> (84 - 2*pos);
+ masked |= low_rev << (2*pos - 52);
+ masked &= MASK6;
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ *(--pointers[masked]) = chrpos;
}
}
- chrpos++;
- pos++;
+ chrpos--;
+ pos--;
}
- while (pos <= enddiscard && pos <= 26) {
+ while (pos >= startdiscard && pos >= 16) {
masked = low_rev >> (52 - 2*pos);
masked &= MASK6;
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ *(--pointers[masked]) = chrpos;
}
}
- chrpos++;
- pos++;
+ chrpos--;
+ pos--;
}
- while (pos <= enddiscard && pos <= 31) {
- masked = nexthigh_rev >> (84 - 2*pos);
- masked |= low_rev << (2*pos - 52);
+ while (pos >= startdiscard && pos >= 11) {
+ masked = low_rev >> (52 - 2*pos);
+ masked |= high_rev << (2*pos - 20);
masked &= MASK6;
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ *(--pointers[masked]) = chrpos;
}
}
- chrpos++;
- pos++;
+ chrpos--;
+ pos--;
+ }
+
+ while (pos >= startdiscard) {
+ masked = high_rev >> (20 - 2*pos);
+ masked &= MASK6;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos--;
}
return chrpos;
@@ -9065,39 +9295,39 @@ count_5mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T lo
Genomecomp_T masked;
int pos;
- pos = startdiscard;
- while (pos <= enddiscard && pos <= 11) {
- masked = high_rev >> (22 - 2*pos);
+ pos = enddiscard;
+ while (pos >= startdiscard && pos >= 28) {
+ masked = nexthigh_rev >> (86 - 2*pos);
+ masked |= low_rev << (2*pos - 54);
masked &= MASK5;
counts[masked] += 1;
debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos++;
+ pos--;
}
- while (pos <= enddiscard && pos <= 15) {
+ while (pos >= startdiscard && pos >= 16) {
masked = low_rev >> (54 - 2*pos);
- masked |= high_rev << (2*pos - 22);
masked &= MASK5;
counts[masked] += 1;
debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos++;
+ pos--;
}
-
- while (pos <= enddiscard && pos <= 27) {
+
+ while (pos >= startdiscard && pos >= 12) {
masked = low_rev >> (54 - 2*pos);
+ masked |= high_rev << (2*pos - 22);
masked &= MASK5;
counts[masked] += 1;
debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos++;
+ pos--;
}
-
- while (pos <= enddiscard && pos <= 31) {
- masked = nexthigh_rev >> (86 - 2*pos);
- masked |= low_rev << (2*pos - 54);
+
+ while (pos >= startdiscard) {
+ masked = high_rev >> (22 - 2*pos);
masked &= MASK5;
counts[masked] += 1;
debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos++;
+ pos--;
}
return;
@@ -9111,264 +9341,134 @@ store_5mers_fwd_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positi
Genomecomp_T masked;
int pos;
- pos = startdiscard;
- while (pos <= enddiscard && pos <= 11) {
- masked = high_rev >> (22 - 2*pos);
+ pos = enddiscard;
+ while (pos >= startdiscard && pos >= 28) {
+ masked = nexthigh_rev >> (86 - 2*pos);
+ masked |= low_rev << (2*pos - 54);
masked &= MASK5;
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ *(--pointers[masked]) = chrpos;
}
}
- chrpos++;
- pos++;
+ chrpos--;
+ pos--;
}
- while (pos <= enddiscard && pos <= 15) {
+ while (pos >= startdiscard && pos >= 16) {
masked = low_rev >> (54 - 2*pos);
- masked |= high_rev << (2*pos - 22);
masked &= MASK5;
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ *(--pointers[masked]) = chrpos;
}
}
- chrpos++;
- pos++;
+ chrpos--;
+ pos--;
}
-
- while (pos <= enddiscard && pos <= 27) {
+
+ while (pos >= startdiscard && pos >= 12) {
masked = low_rev >> (54 - 2*pos);
+ masked |= high_rev << (2*pos - 22);
masked &= MASK5;
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ *(--pointers[masked]) = chrpos;
}
}
- chrpos++;
- pos++;
+ chrpos--;
+ pos--;
}
-
- while (pos <= enddiscard && pos <= 31) {
- masked = nexthigh_rev >> (86 - 2*pos);
- masked |= low_rev << (2*pos - 54);
+
+ while (pos >= startdiscard) {
+ masked = high_rev >> (22 - 2*pos);
masked &= MASK5;
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ *(--pointers[masked]) = chrpos;
}
}
- chrpos++;
- pos++;
+ chrpos--;
+ pos--;
}
return chrpos;
}
-static void
-count_8mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
- Genomecomp_T masked, oligo;
-
- masked = high_rev >> 16; /* 0, No mask necessary */
- counts[masked] += 1;
- debug(printf("0 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rev >> 14) & MASK8; /* 1 */
- counts[masked] += 1;
- debug(printf("1 %04X => %d\n",masked,counts[masked]));
+#if 0
+/* Note; for AVX2 and AVX512 */
+/* Variable bit shift right logical (VPSRLVD/Q) */
+_varcount is 16, 14, 12, 10, 8, 6, 4 2 in eight 32-bit quantities in __m256i
+_high_rev is broadcast in eight 32-bit quantities in __m256i
+
+ _mm256_slrv_epi32(_high_rev,_varcount);
+ Then need to mask
+ (Gather in AVX2)
+ (Scatter in AVX-512)
+#endif
- masked = (high_rev >> 12) & MASK8; /* 2 */
- counts[masked] += 1;
- debug(printf("2 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 10) & MASK8; /* 3 */
- counts[masked] += 1;
- debug(printf("3 %04X => %d\n",masked,counts[masked]));
+#if 0
+ /* Replaced by individual count_*mer_{fwd|rev}_simd procedures */
+ /* array is filled by extract_*mers_{fwd|rev}_simd */
+ /* Fwd and rev procedures differ only in the order of indices */
+static void
+count_fwdrev_simd (Count_T *counts, UINT4 *array) {
+ UINT4 *ptr;
- masked = (high_rev >> 8) & MASK8; /* 4 */
- counts[masked] += 1;
- debug(printf("4 %04X => %d\n",masked,counts[masked]));
+ /* Fwd: Starts with 0 because we used _setr_ and not _set_ */
+ /* Rev: Starts with 63 because we used _set_ and not _setr_ */
+ ptr = &(array[0]);
+ debug(printf("Fwd: 0 %04X, 16 %04X, 32 %04X, 48 %04X || ",ptr[0],ptr[1],ptr[2],ptr[3]));
+ debug(printf("Rev: 63 %04X, 47 %04X, 31 %04X, 15 %04X\n",ptr[0],ptr[1],ptr[2],ptr[3]));
+ counts[*ptr++] += 1; /* 0 */ /* 63 */
+ counts[*ptr++] += 1; /* 16 */ /* 47 */
+ counts[*ptr++] += 1; /* 32 */ /* 31 */
+ counts[*ptr++] += 1; /* 48 */ /* 15 */
- masked = (high_rev >> 6) & MASK8; /* 5 */
- counts[masked] += 1;
- debug(printf("5 %04X => %d\n",masked,counts[masked]));
+ debug(printf("Fwd: 1 %04X, 17 %04X, 33 %04X, 49 %04X || ",ptr[0],ptr[1],ptr[2],ptr[3]));
+ debug(printf("Rev: 62 %04X, 46 %04X, 30 %04X, 14 %04X\n",ptr[0],ptr[1],ptr[2],ptr[3]));
+ counts[*ptr++] += 1; /* 1 */ /* 62 */
+ counts[*ptr++] += 1; /* 17 */ /* 46 */
+ counts[*ptr++] += 1; /* 33 */ /* 30 */
+ counts[*ptr++] += 1; /* 49 */ /* 14 */
- masked = (high_rev >> 4) & MASK8; /* 6 */
- counts[masked] += 1;
- debug(printf("6 %04X => %d\n",masked,counts[masked]));
+ debug(printf("Fwd: 2 %04X, 18 %04X, 34 %04X, 50 %04X || ",ptr[0],ptr[1],ptr[2],ptr[3]));
+ debug(printf("Rev: 61 %04X, 45 %04X, 29 %04X, 13 %04X\n",ptr[0],ptr[1],ptr[2],ptr[3]));
+ counts[*ptr++] += 1; /* 2 */ /* 61 */
+ counts[*ptr++] += 1; /* 18 */ /* 45 */
+ counts[*ptr++] += 1; /* 34 */ /* 29 */
+ counts[*ptr++] += 1; /* 50 */ /* 13 */
- masked = (high_rev >> 2) & MASK8; /* 7 */
- counts[masked] += 1;
- debug(printf("7 %04X => %d\n",masked,counts[masked]));
+ debug(printf("Fwd: 3 %04X, 19 %04X, 35 %04X, 51 %04X || ",ptr[0],ptr[1],ptr[2],ptr[3]));
+ debug(printf("Rev: 60 %04X, 44 %04X, 28 %04X, 12 %04X\n",ptr[0],ptr[1],ptr[2],ptr[3]));
+ counts[*ptr++] += 1; /* 3 */ /* 60 */
+ counts[*ptr++] += 1; /* 19 */ /* 44 */
+ counts[*ptr++] += 1; /* 35 */ /* 28 */
+ counts[*ptr++] += 1; /* 51 */ /* 12 */
- masked = high_rev & MASK8; /* 8 */
- counts[masked] += 1;
- debug(printf("8 %04X => %d\n",masked,counts[masked]));
+ debug(printf("Fwd: 4 %04X, 20 %04X, 36 %04X, 52 %04X || ",ptr[0],ptr[1],ptr[2],ptr[3]));
+ debug(printf("Rev: 59 %04X, 43 %04X, 27 %04X, 11 %04X\n",ptr[0],ptr[1],ptr[2],ptr[3]));
+ counts[*ptr++] += 1; /* 4 */ /* 59 */
+ counts[*ptr++] += 1; /* 20 */ /* 43 */
+ counts[*ptr++] += 1; /* 36 */ /* 27 */
+ counts[*ptr++] += 1; /* 52 */ /* 11 */
-
- oligo = low_rev >> 18; /* For 9..15 */
- oligo |= high_rev << 14;
-
- masked = (oligo >> 12) & MASK8; /* 9 */
- counts[masked] += 1;
- debug(printf("9 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 10) & MASK8; /* 10 */
- counts[masked] += 1;
- debug(printf("10 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 8) & MASK8; /* 11 */
- counts[masked] += 1;
- debug(printf("11 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 6) & MASK8; /* 12 */
- counts[masked] += 1;
- debug(printf("12 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 4) & MASK8; /* 13 */
- counts[masked] += 1;
- debug(printf("13 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 2) & MASK8; /* 14 */
- counts[masked] += 1;
- debug(printf("14 %04X => %d\n",masked,counts[masked]));
-
- masked = oligo & MASK8; /* 15 */
- counts[masked] += 1;
- debug(printf("15 %04X => %d\n",masked,counts[masked]));
-
- masked = low_rev >> 16; /* 16, No mask necessary */
- counts[masked] += 1;
- debug(printf("16 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rev >> 14) & MASK8; /* 17 */
- counts[masked] += 1;
- debug(printf("17 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rev >> 12) & MASK8; /* 18 */
- counts[masked] += 1;
- debug(printf("18 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rev >> 10) & MASK8; /* 19 */
- counts[masked] += 1;
- debug(printf("19 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rev >> 8) & MASK8; /* 20 */
- counts[masked] += 1;
- debug(printf("20 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rev >> 6) & MASK8; /* 21 */
- counts[masked] += 1;
- debug(printf("21 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rev >> 4) & MASK8; /* 22 */
- counts[masked] += 1;
- debug(printf("22 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rev >> 2) & MASK8; /* 23 */
- counts[masked] += 1;
- debug(printf("23 %04X => %d\n",masked,counts[masked]));
-
- masked = low_rev & MASK8; /* 24 */
- counts[masked] += 1;
- debug(printf("24 %04X => %d\n",masked,counts[masked]));
-
-
- oligo = nexthigh_rev >> 18; /* For 25..31 */
- oligo |= low_rev << 14;
-
- masked = (oligo >> 12) & MASK8; /* 25 */
- counts[masked] += 1;
- debug(printf("25 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 10) & MASK8; /* 26 */
- counts[masked] += 1;
- debug(printf("26 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 8) & MASK8; /* 27 */
- counts[masked] += 1;
- debug(printf("27 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 6) & MASK8; /* 28 */
- counts[masked] += 1;
- debug(printf("28 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 4) & MASK8; /* 29 */
- counts[masked] += 1;
- debug(printf("29 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 2) & MASK8; /* 30 */
- counts[masked] += 1;
- debug(printf("30 %04X => %d\n",masked,counts[masked]));
-
- masked = oligo & MASK8; /* 31 */
- counts[masked] += 1;
- debug(printf("31 %04X => %d\n",masked,counts[masked]));
-
- return;
-}
-
-
-
-#ifdef USE_SIMD_FOR_COUNTS
-/* Fwd and rev procedures differ only in the order of indices */
-static void
-count_fwdrev_simd (Count_T *counts, UINT4 *array) {
- UINT4 *ptr;
-
- /* Fwd: Starts with 0 because we used _setr_ and not _set_ */
- /* Rev: Starts with 63 because we used _set_ and not _setr_ */
- ptr = &(array[0]);
- debug(printf("Fwd: 0 %04X, 16 %04X, 32 %04X, 48 %04X || ",ptr[0],ptr[1],ptr[2],ptr[3]));
- debug(printf("Rev: 63 %04X, 47 %04X, 31 %04X, 15 %04X\n",ptr[0],ptr[1],ptr[2],ptr[3]));
- counts[*ptr++] += 1; /* 0 */ /* 63 */
- counts[*ptr++] += 1; /* 16 */ /* 47 */
- counts[*ptr++] += 1; /* 32 */ /* 31 */
- counts[*ptr++] += 1; /* 48 */ /* 15 */
-
- debug(printf("Fwd: 1 %04X, 17 %04X, 33 %04X, 49 %04X || ",ptr[0],ptr[1],ptr[2],ptr[3]));
- debug(printf("Rev: 62 %04X, 46 %04X, 30 %04X, 14 %04X\n",ptr[0],ptr[1],ptr[2],ptr[3]));
- counts[*ptr++] += 1; /* 1 */ /* 62 */
- counts[*ptr++] += 1; /* 17 */ /* 46 */
- counts[*ptr++] += 1; /* 33 */ /* 30 */
- counts[*ptr++] += 1; /* 49 */ /* 14 */
-
- debug(printf("Fwd: 2 %04X, 18 %04X, 34 %04X, 50 %04X || ",ptr[0],ptr[1],ptr[2],ptr[3]));
- debug(printf("Rev: 61 %04X, 45 %04X, 29 %04X, 13 %04X\n",ptr[0],ptr[1],ptr[2],ptr[3]));
- counts[*ptr++] += 1; /* 2 */ /* 61 */
- counts[*ptr++] += 1; /* 18 */ /* 45 */
- counts[*ptr++] += 1; /* 34 */ /* 29 */
- counts[*ptr++] += 1; /* 50 */ /* 13 */
-
- debug(printf("Fwd: 3 %04X, 19 %04X, 35 %04X, 51 %04X || ",ptr[0],ptr[1],ptr[2],ptr[3]));
- debug(printf("Rev: 60 %04X, 44 %04X, 28 %04X, 12 %04X\n",ptr[0],ptr[1],ptr[2],ptr[3]));
- counts[*ptr++] += 1; /* 3 */ /* 60 */
- counts[*ptr++] += 1; /* 19 */ /* 44 */
- counts[*ptr++] += 1; /* 35 */ /* 28 */
- counts[*ptr++] += 1; /* 51 */ /* 12 */
-
- debug(printf("Fwd: 4 %04X, 20 %04X, 36 %04X, 52 %04X || ",ptr[0],ptr[1],ptr[2],ptr[3]));
- debug(printf("Rev: 59 %04X, 43 %04X, 27 %04X, 11 %04X\n",ptr[0],ptr[1],ptr[2],ptr[3]));
- counts[*ptr++] += 1; /* 4 */ /* 59 */
- counts[*ptr++] += 1; /* 20 */ /* 43 */
- counts[*ptr++] += 1; /* 36 */ /* 27 */
- counts[*ptr++] += 1; /* 52 */ /* 11 */
-
- debug(printf("Fwd: 5 %04X, 21 %04X, 37 %04X, 53 %04X || ",ptr[0],ptr[1],ptr[2],ptr[3]));
- debug(printf("Rev: 58 %04X, 42 %04X, 26 %04X, 10 %04X\n",ptr[0],ptr[1],ptr[2],ptr[3]));
- counts[*ptr++] += 1; /* 5 */ /* 58 */
- counts[*ptr++] += 1; /* 21 */ /* 42 */
- counts[*ptr++] += 1; /* 37 */ /* 26 */
- counts[*ptr++] += 1; /* 53 */ /* 10 */
+ debug(printf("Fwd: 5 %04X, 21 %04X, 37 %04X, 53 %04X || ",ptr[0],ptr[1],ptr[2],ptr[3]));
+ debug(printf("Rev: 58 %04X, 42 %04X, 26 %04X, 10 %04X\n",ptr[0],ptr[1],ptr[2],ptr[3]));
+ counts[*ptr++] += 1; /* 5 */ /* 58 */
+ counts[*ptr++] += 1; /* 21 */ /* 42 */
+ counts[*ptr++] += 1; /* 37 */ /* 26 */
+ counts[*ptr++] += 1; /* 53 */ /* 10 */
debug(printf("Fwd: 6 %04X, 22 %04X, 38 %04X, 54 %04X || ",ptr[0],ptr[1],ptr[2],ptr[3]));
debug(printf("Rev: 57 %04X, 41 %04X, 25 %04X, 9 %04X\n",ptr[0],ptr[1],ptr[2],ptr[3]));
@@ -9454,1103 +9554,1107 @@ store_fwdrev_simd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, C
UINT4 *array) {
Genomecomp_T masked;
- /* Row 0 */
- masked = array[0];
+ /* Row 4 */
+ masked = array[63];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos));
+ *(--pointers[masked]) = chrpos;
}
}
- masked = array[4];
+ masked = array[59];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 1;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+ *(--pointers[masked]) = chrpos - 1;
}
}
- masked = array[8];
+ masked = array[55];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 2;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+ *(--pointers[masked]) = chrpos - 2;
}
}
- masked = array[12];
+ masked = array[51];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 3;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+ *(--pointers[masked]) = chrpos - 3;
}
}
- masked = array[16];
+ masked = array[47];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 4;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+ *(--pointers[masked]) = chrpos - 4;
}
}
- masked = array[20];
+ masked = array[43];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 5;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+ *(--pointers[masked]) = chrpos - 5;
}
}
- masked = array[24];
+ masked = array[39];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 6;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+ *(--pointers[masked]) = chrpos - 6;
}
}
- masked = array[28];
+ masked = array[35];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 7;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+ *(--pointers[masked]) = chrpos - 7;
}
}
- masked = array[32];
+ masked = array[31];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 8;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+ *(--pointers[masked]) = chrpos - 8;
}
}
- masked = array[36];
+ masked = array[27];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 9;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+ *(--pointers[masked]) = chrpos - 9;
}
}
- masked = array[40];
+ masked = array[23];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 10;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+ *(--pointers[masked]) = chrpos - 10;
}
}
- masked = array[44];
+ masked = array[19];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 11;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+ *(--pointers[masked]) = chrpos - 11;
}
}
- masked = array[48];
+ masked = array[15];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 12;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+ *(--pointers[masked]) = chrpos - 12;
}
}
- masked = array[52];
+ masked = array[11];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 13;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+ *(--pointers[masked]) = chrpos - 13;
}
}
- masked = array[56];
+ masked = array[7];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 14;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+ *(--pointers[masked]) = chrpos - 14;
}
}
- masked = array[60];
+ masked = array[3];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 15;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+ *(--pointers[masked]) = chrpos - 15;
}
}
- /* Row 1 */
- masked = array[1];
+
+ /* Row 3 */
+ masked = array[62];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 16;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+ *(--pointers[masked]) = chrpos - 16;
}
}
- masked = array[5];
+ masked = array[58];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 17;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+ *(--pointers[masked]) = chrpos - 17;
}
}
- masked = array[9];
+ masked = array[54];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 18;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+ *(--pointers[masked]) = chrpos - 18;
}
}
- masked = array[13];
+ masked = array[50];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 19;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+ *(--pointers[masked]) = chrpos - 19;
}
}
- masked = array[17];
+ masked = array[46];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 20;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+ *(--pointers[masked]) = chrpos - 20;
}
}
- masked = array[21];
+ masked = array[42];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 21;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+ *(--pointers[masked]) = chrpos - 21;
}
}
- masked = array[25];
+ masked = array[38];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 22;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+ *(--pointers[masked]) = chrpos - 22;
}
}
- masked = array[29];
+ masked = array[34];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 23;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+ *(--pointers[masked]) = chrpos - 23;
}
}
- masked = array[33];
+ masked = array[30];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 24;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+ *(--pointers[masked]) = chrpos - 24;
}
}
- masked = array[37];
+ masked = array[26];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 25;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+ *(--pointers[masked]) = chrpos - 25;
}
}
- masked = array[41];
+ masked = array[22];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 26;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+ *(--pointers[masked]) = chrpos - 26;
}
}
- masked = array[45];
+ masked = array[18];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 27;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+ *(--pointers[masked]) = chrpos - 27;
}
}
- masked = array[49];
+ masked = array[14];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 28;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+ *(--pointers[masked]) = chrpos - 28;
}
}
- masked = array[53];
+ masked = array[10];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 29;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+ *(--pointers[masked]) = chrpos - 29;
}
}
- masked = array[57];
+ masked = array[6];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 30;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+ *(--pointers[masked]) = chrpos - 30;
}
}
- masked = array[61];
+ masked = array[2];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 31;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+ *(--pointers[masked]) = chrpos - 31;
}
}
- /* Row 2 */
- masked = array[2];
+ /* Row 1 */
+ masked = array[61];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 32;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 32));
+ *(--pointers[masked]) = chrpos - 32;
}
}
- masked = array[6];
+ masked = array[57];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 33;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 33));
+ *(--pointers[masked]) = chrpos - 33;
}
}
- masked = array[10];
+ masked = array[53];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 34;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 34));
+ *(--pointers[masked]) = chrpos - 34;
}
}
- masked = array[14];
+ masked = array[49];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 35;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 35));
+ *(--pointers[masked]) = chrpos - 35;
}
}
- masked = array[18];
+ masked = array[45];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 36;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 36));
+ *(--pointers[masked]) = chrpos - 36;
}
}
- masked = array[22];
+ masked = array[41];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 37;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 37));
+ *(--pointers[masked]) = chrpos - 37;
}
}
- masked = array[26];
+ masked = array[37];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 38;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 38));
+ *(--pointers[masked]) = chrpos - 38;
}
}
- masked = array[30];
+ masked = array[33];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 39;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 39));
+ *(--pointers[masked]) = chrpos - 39;
}
}
- masked = array[34];
+ masked = array[29];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 40;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 40));
+ *(--pointers[masked]) = chrpos - 40;
}
}
- masked = array[38];
+ masked = array[25];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 41;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 41));
+ *(--pointers[masked]) = chrpos - 41;
}
}
- masked = array[42];
+ masked = array[21];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 42;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 42));
+ *(--pointers[masked]) = chrpos - 42;
}
}
- masked = array[46];
+ masked = array[17];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 43;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 43));
+ *(--pointers[masked]) = chrpos - 43;
}
}
- masked = array[50];
+ masked = array[13];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 44;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 44));
+ *(--pointers[masked]) = chrpos - 44;
}
}
- masked = array[54];
+ masked = array[9];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 45;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 45));
+ *(--pointers[masked]) = chrpos - 45;
}
}
- masked = array[58];
+ masked = array[5];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 46;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 46));
+ *(--pointers[masked]) = chrpos - 46;
}
}
- masked = array[62];
+ masked = array[1];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 47;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 47));
+ *(--pointers[masked]) = chrpos - 47;
}
}
- /* Row 3 */
- masked = array[3];
+ /* Row 0 */
+ masked = array[60];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 48;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 48));
+ *(--pointers[masked]) = chrpos - 48;
}
}
- masked = array[7];
+ masked = array[56];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 49;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 49));
+ *(--pointers[masked]) = chrpos - 49;
}
}
- masked = array[11];
+ masked = array[52];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 50;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 50));
+ *(--pointers[masked]) = chrpos - 50;
}
}
- masked = array[15];
+ masked = array[48];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 51;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 51));
+ *(--pointers[masked]) = chrpos - 51;
}
}
- masked = array[19];
+ masked = array[44];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 52;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 52));
+ *(--pointers[masked]) = chrpos - 52;
}
}
- masked = array[23];
+ masked = array[40];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 53;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 53));
+ *(--pointers[masked]) = chrpos - 53;
}
}
- masked = array[27];
+ masked = array[36];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 54;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 54));
+ *(--pointers[masked]) = chrpos - 54;
}
}
- masked = array[31];
+ masked = array[32];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 55;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 55));
+ *(--pointers[masked]) = chrpos - 55;
}
}
- masked = array[35];
+ masked = array[28];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 56;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 56));
+ *(--pointers[masked]) = chrpos - 56;
}
}
- masked = array[39];
+ masked = array[24];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 57;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 57));
+ *(--pointers[masked]) = chrpos - 57;
}
}
- masked = array[43];
+ masked = array[20];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 58;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 58));
+ *(--pointers[masked]) = chrpos - 58;
}
}
- masked = array[47];
+ masked = array[16];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 59;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 59));
+ *(--pointers[masked]) = chrpos - 59;
}
}
- masked = array[51];
+ masked = array[12];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 60;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 60));
+ *(--pointers[masked]) = chrpos - 60;
}
}
- masked = array[55];
+ masked = array[8];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 61;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 61));
+ *(--pointers[masked]) = chrpos - 61;
}
}
- masked = array[59];
+ masked = array[4];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 62;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 62));
+ *(--pointers[masked]) = chrpos - 62;
}
}
- masked = array[63];
+ masked = array[0];
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 63;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 63));
+ *(--pointers[masked]) = chrpos - 63;
}
}
- return chrpos + 64;
+ return chrpos - 64;
}
#endif
-/* Expecting current to have {high0_rev, low0_rev, high1_rev,
- low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and
- high2_rev} */
-#ifdef USE_SIMD_FOR_COUNTS
static void
-extract_8mers_fwd_simd (__m128i *out, __m128i current, __m128i next) {
- __m128i oligo;
+count_9mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+ Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
- _mm_store_si128(out++, _mm_srli_epi32(current,16)); /* No mask necessary */
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask8));
- _mm_store_si128(out++, _mm_and_si128( current, mask8));
- oligo = _mm_or_si128( _mm_srli_epi32(next,18), _mm_slli_epi32(current,14));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,12), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask8));
- _mm_store_si128(out++, _mm_and_si128( oligo, mask8));
+ oligo = nexthigh_rev >> 16; /* For 31..24 */
+ oligo |= low_rev << 16;
- return;
-}
-#endif
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK9; /* 31 */
+ counts[masked] += 1;
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
+ masked = (oligo >> 2) & MASK9; /* 30 */
+ counts[masked] += 1;
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
-static int
-store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
- Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
- Genomecomp_T masked, oligo;
+ masked = (oligo >> 4) & MASK9; /* 29 */
+ counts[masked] += 1;
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
- masked = high_rev >> 16; /* 0, No mask necessary */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
- }
-
- masked = (high_rev >> 14) & MASK8; /* 1 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 1;
- }
- }
+ masked = (oligo >> 6) & MASK9; /* 28 */
+ counts[masked] += 1;
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 12) & MASK8; /* 2 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 2;
- }
- }
+ masked = (oligo >> 8) & MASK9; /* 27 */
+ counts[masked] += 1;
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 10) & MASK8; /* 3 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 3;
- }
- }
+ masked = (oligo >> 10) & MASK9; /* 26 */
+ counts[masked] += 1;
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 8) & MASK8; /* 4 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 4;
- }
- }
+ masked = (oligo >> 12) & MASK9; /* 25 */
+ counts[masked] += 1;
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 6) & MASK8; /* 5 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 5;
- }
- }
+ masked = (oligo >> 14) & MASK9; /* 24 */
+ counts[masked] += 1;
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 4) & MASK8; /* 6 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 6;
- }
- }
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask9);
- masked = (high_rev >> 2) & MASK8; /* 7 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 7;
- }
- }
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
- masked = high_rev & MASK8; /* 8 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 8;
- }
- }
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
- oligo = low_rev >> 18; /* For 9..15 */
- oligo |= high_rev << 14;
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 12) & MASK8; /* 9 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 9;
- }
- }
- masked = (oligo >> 10) & MASK8; /* 10 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 10;
- }
- }
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask9);
- masked = (oligo >> 8) & MASK8; /* 11 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 11;
- }
- }
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 6) & MASK8; /* 12 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 12;
- }
- }
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 4) & MASK8; /* 13 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 13;
- }
- }
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 2) & MASK8; /* 14 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 14;
- }
- }
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
+#endif
- masked = oligo & MASK8; /* 15 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 15;
- }
- }
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rev & MASK9; /* 23 */
+ counts[masked] += 1;
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
- masked = low_rev >> 16; /* 16, No mask necessary */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 16;
- }
- }
-
- masked = (low_rev >> 14) & MASK8; /* 17 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 17;
- }
- }
+ masked = (low_rev >> 2) & MASK9; /* 22 */
+ counts[masked] += 1;
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 12) & MASK8; /* 18 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 18;
- }
- }
+ masked = (low_rev >> 4) & MASK9; /* 21 */
+ counts[masked] += 1;
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 10) & MASK8; /* 19 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 19;
- }
- }
+ masked = (low_rev >> 6) & MASK9; /* 20 */
+ counts[masked] += 1;
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 8) & MASK8; /* 20 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 20;
- }
- }
+ masked = (low_rev >> 8) & MASK9; /* 19 */
+ counts[masked] += 1;
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 6) & MASK8; /* 21 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 21;
- }
- }
+ masked = (low_rev >> 10) & MASK9; /* 18 */
+ counts[masked] += 1;
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 4) & MASK8; /* 22 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 22;
- }
- }
+ masked = (low_rev >> 12) & MASK9; /* 17 */
+ counts[masked] += 1;
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 2) & MASK8; /* 23 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 23;
- }
- }
-
- masked = low_rev & MASK8; /* 24 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 24;
- }
- }
+ masked = low_rev >> 14; /* 16, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("16 %04X => %d\n",masked,counts[masked]));
+#else
+ _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask9);
- oligo = nexthigh_rev >> 18; /* For 25..31 */
- oligo |= low_rev << 14;
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 12) & MASK8; /* 25 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 25;
- }
- }
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 10) & MASK8; /* 26 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 26;
- }
- }
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 8) & MASK8; /* 27 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 27;
- }
- }
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 6) & MASK8; /* 28 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 28;
- }
- }
- masked = (oligo >> 4) & MASK8; /* 29 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 29;
- }
- }
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask9);
- masked = (oligo >> 2) & MASK8; /* 30 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 30;
- }
- }
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
- masked = oligo & MASK8; /* 31 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 31;
- }
- }
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
- return chrpos + 32;
-}
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("16 %04X => %d\n",masked,counts[masked]));
+#endif
-static void
-count_7mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
- Genomecomp_T masked, oligo;
+ oligo = low_rev >> 16; /* For 15..8 */
+ oligo |= high_rev << 16;
- masked = high_rev >> 18; /* 0, No mask necessary */
- counts[masked] += 1;
- debug(printf("0 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rev >> 16) & MASK7; /* 1 */
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK9; /* 15 */
counts[masked] += 1;
- debug(printf("1 %04X => %d\n",masked,counts[masked]));
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 14) & MASK7; /* 2 */
+ masked = (oligo >> 2) & MASK9; /* 14 */
counts[masked] += 1;
- debug(printf("2 %04X => %d\n",masked,counts[masked]));
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 12) & MASK7; /* 3 */
+ masked = (oligo >> 4) & MASK9; /* 13 */
counts[masked] += 1;
- debug(printf("3 %04X => %d\n",masked,counts[masked]));
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 10) & MASK7; /* 4 */
+ masked = (oligo >> 6) & MASK9; /* 12 */
counts[masked] += 1;
- debug(printf("4 %04X => %d\n",masked,counts[masked]));
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 8) & MASK7; /* 5 */
+ masked = (oligo >> 8) & MASK9; /* 11 */
counts[masked] += 1;
- debug(printf("5 %04X => %d\n",masked,counts[masked]));
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 6) & MASK7; /* 6 */
+ masked = (oligo >> 10) & MASK9; /* 10 */
counts[masked] += 1;
- debug(printf("6 %04X => %d\n",masked,counts[masked]));
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 4) & MASK7; /* 7 */
+ masked = (oligo >> 12) & MASK9; /* 9 */
counts[masked] += 1;
- debug(printf("7 %04X => %d\n",masked,counts[masked]));
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 2) & MASK7; /* 8 */
+ masked = (oligo >> 14) & MASK9; /* 8 */
counts[masked] += 1;
debug(printf("8 %04X => %d\n",masked,counts[masked]));
- masked = high_rev & MASK7; /* 9 */
- counts[masked] += 1;
- debug(printf("9 %04X => %d\n",masked,counts[masked]));
-
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask9);
- oligo = low_rev >> 20; /* For 10..15 */
- oligo |= high_rev << 12;
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 10) & MASK7; /* 10 */
+ masked = _mm_extract_epi32(_masked,1);
counts[masked] += 1;
- debug(printf("10 %04X => %d\n",masked,counts[masked]));
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 8) & MASK7; /* 11 */
+ masked = _mm_extract_epi32(_masked,2);
counts[masked] += 1;
- debug(printf("11 %04X => %d\n",masked,counts[masked]));
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 6) & MASK7; /* 12 */
+ masked = _mm_extract_epi32(_masked,3);
counts[masked] += 1;
debug(printf("12 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 4) & MASK7; /* 13 */
- counts[masked] += 1;
- debug(printf("13 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 2) & MASK7; /* 14 */
- counts[masked] += 1;
- debug(printf("14 %04X => %d\n",masked,counts[masked]));
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask9);
- masked = oligo & MASK7; /* 15 */
+ masked = _mm_extract_epi32(_masked,0);
counts[masked] += 1;
- debug(printf("15 %04X => %d\n",masked,counts[masked]));
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
- masked = low_rev >> 18; /* 16, No mask necessary */
+ masked = _mm_extract_epi32(_masked,2);
counts[masked] += 1;
- debug(printf("16 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rev >> 16) & MASK7; /* 17 */
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
counts[masked] += 1;
- debug(printf("17 %04X => %d\n",masked,counts[masked]));
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
+#endif
- masked = (low_rev >> 14) & MASK7; /* 18 */
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rev & MASK9; /* 7 */
counts[masked] += 1;
- debug(printf("18 %04X => %d\n",masked,counts[masked]));
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 12) & MASK7; /* 19 */
+ masked = (high_rev >> 2) & MASK9; /* 6 */
counts[masked] += 1;
- debug(printf("19 %04X => %d\n",masked,counts[masked]));
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 10) & MASK7; /* 20 */
+ masked = (high_rev >> 4) & MASK9; /* 5 */
counts[masked] += 1;
- debug(printf("20 %04X => %d\n",masked,counts[masked]));
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 8) & MASK7; /* 21 */
+ masked = (high_rev >> 6) & MASK9; /* 4 */
counts[masked] += 1;
- debug(printf("21 %04X => %d\n",masked,counts[masked]));
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 6) & MASK7; /* 22 */
+ masked = (high_rev >> 8) & MASK9; /* 3 */
counts[masked] += 1;
- debug(printf("22 %04X => %d\n",masked,counts[masked]));
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 4) & MASK7; /* 23 */
+ masked = (high_rev >> 10) & MASK9; /* 2 */
counts[masked] += 1;
- debug(printf("23 %04X => %d\n",masked,counts[masked]));
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 2) & MASK7; /* 24 */
+ masked = (high_rev >> 12) & MASK9; /* 1 */
counts[masked] += 1;
- debug(printf("24 %04X => %d\n",masked,counts[masked]));
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
- masked = low_rev & MASK7; /* 25 */
+ masked = high_rev >> 14; /* 0, No mask necessary */
counts[masked] += 1;
- debug(printf("25 %04X => %d\n",masked,counts[masked]));
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
+#else
+ _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask9);
- oligo = nexthigh_rev >> 20; /* For 26..31 */
- oligo |= low_rev << 12;
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 10) & MASK7; /* 26 */
+ masked = _mm_extract_epi32(_masked,1);
counts[masked] += 1;
- debug(printf("26 %04X => %d\n",masked,counts[masked]));
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 8) & MASK7; /* 27 */
+ masked = _mm_extract_epi32(_masked,2);
counts[masked] += 1;
- debug(printf("27 %04X => %d\n",masked,counts[masked]));
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 6) & MASK7; /* 28 */
+ masked = _mm_extract_epi32(_masked,3);
counts[masked] += 1;
- debug(printf("28 %04X => %d\n",masked,counts[masked]));
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 4) & MASK7; /* 29 */
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
counts[masked] += 1;
- debug(printf("29 %04X => %d\n",masked,counts[masked]));
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 2) & MASK7; /* 30 */
+ masked = _mm_extract_epi32(_masked,1);
counts[masked] += 1;
- debug(printf("30 %04X => %d\n",masked,counts[masked]));
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
- masked = oligo & MASK7; /* 31 */
+ masked = _mm_extract_epi32(_masked,2);
counts[masked] += 1;
- debug(printf("31 %04X => %d\n",masked,counts[masked]));
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
+#endif
return;
}
+
/* Expecting current to have {high0_rev, low0_rev, high1_rev,
low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and
high2_rev} */
#ifdef USE_SIMD_FOR_COUNTS
static void
-extract_7mers_fwd_simd (__m128i *out, __m128i current, __m128i next) {
+extract_9mers_fwd_simd (__m128i *out, __m128i current, __m128i next) {
__m128i oligo;
- _mm_store_si128(out++, _mm_srli_epi32(current,18)); /* No mask necessary */
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask7));
- _mm_store_si128(out++, _mm_and_si128( current, mask7));
+ _mm_store_si128(out++, _mm_srli_epi32(current,14)); /* No mask necessary */
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask9));
+ _mm_store_si128(out++, _mm_and_si128( current, mask9));
+
+ oligo = _mm_or_si128( _mm_srli_epi32(next,16), _mm_slli_epi32(current,16));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,14), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,12), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask9));
+ _mm_store_si128(out++, _mm_and_si128( oligo, mask9));
- oligo = _mm_or_si128( _mm_srli_epi32(next,20), _mm_slli_epi32(current,12));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask7));
- _mm_store_si128(out++, _mm_and_si128( oligo, mask7));
+ return;
+}
+
+static void
+count_9mers_fwd_simd (Count_T *counts, __m128i current, __m128i next) {
+ __m128i oligo;
+ Genomecomp_T array[4];
+
+ _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,14)); /* No mask necessary */
+ counts[array[0]] += 1; /* 0 */
+ counts[array[1]] += 1; /* 16 */
+ counts[array[2]] += 1; /* 32 */
+ counts[array[3]] += 1; /* 48 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask9));
+ counts[array[0]] += 1; /* 1 */
+ counts[array[1]] += 1; /* 17 */
+ counts[array[2]] += 1; /* 33 */
+ counts[array[3]] += 1; /* 49 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask9));
+ counts[array[0]] += 1; /* 2 */
+ counts[array[1]] += 1; /* 18 */
+ counts[array[2]] += 1; /* 34 */
+ counts[array[3]] += 1; /* 50 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask9));
+ counts[array[0]] += 1; /* 3 */
+ counts[array[1]] += 1; /* 19 */
+ counts[array[2]] += 1; /* 35 */
+ counts[array[3]] += 1; /* 51 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask9));
+ counts[array[0]] += 1; /* 4 */
+ counts[array[1]] += 1; /* 20 */
+ counts[array[2]] += 1; /* 36 */
+ counts[array[3]] += 1; /* 52 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask9));
+ counts[array[0]] += 1; /* 5 */
+ counts[array[1]] += 1; /* 21 */
+ counts[array[2]] += 1; /* 37 */
+ counts[array[3]] += 1; /* 53 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask9));
+ counts[array[0]] += 1; /* 6 */
+ counts[array[1]] += 1; /* 22 */
+ counts[array[2]] += 1; /* 38 */
+ counts[array[3]] += 1; /* 54 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask9));
+ counts[array[0]] += 1; /* 7 */
+ counts[array[1]] += 1; /* 23 */
+ counts[array[2]] += 1; /* 39 */
+ counts[array[3]] += 1; /* 55 */
+
+ oligo = _mm_or_si128( _mm_srli_epi32(next,16), _mm_slli_epi32(current,16));
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,14), mask9));
+ counts[array[0]] += 1; /* 8 */
+ counts[array[1]] += 1; /* 24 */
+ counts[array[2]] += 1; /* 40 */
+ counts[array[3]] += 1; /* 56 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,12), mask9));
+ counts[array[0]] += 1; /* 9 */
+ counts[array[1]] += 1; /* 25 */
+ counts[array[2]] += 1; /* 41 */
+ counts[array[3]] += 1; /* 57 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,10), mask9));
+ counts[array[0]] += 1; /* 10 */
+ counts[array[1]] += 1; /* 26 */
+ counts[array[2]] += 1; /* 42 */
+ counts[array[3]] += 1; /* 58 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask9));
+ counts[array[0]] += 1; /* 11 */
+ counts[array[1]] += 1; /* 27 */
+ counts[array[2]] += 1; /* 43 */
+ counts[array[3]] += 1; /* 59 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask9));
+ counts[array[0]] += 1; /* 12 */
+ counts[array[1]] += 1; /* 28 */
+ counts[array[2]] += 1; /* 44 */
+ counts[array[3]] += 1; /* 60 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask9));
+ counts[array[0]] += 1; /* 13 */
+ counts[array[1]] += 1; /* 29 */
+ counts[array[2]] += 1; /* 45 */
+ counts[array[3]] += 1; /* 61 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask9));
+ counts[array[0]] += 1; /* 14 */
+ counts[array[1]] += 1; /* 30 */
+ counts[array[2]] += 1; /* 46 */
+ counts[array[3]] += 1; /* 62 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask9));
+ counts[array[0]] += 1; /* 15 */
+ counts[array[1]] += 1; /* 31 */
+ counts[array[2]] += 1; /* 47 */
+ counts[array[3]] += 1; /* 63 */
return;
}
@@ -10558,4469 +10662,12760 @@ extract_7mers_fwd_simd (__m128i *out, __m128i current, __m128i next) {
static int
-store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
- masked = high_rev >> 18; /* 0, No mask necessary */
+
+ oligo = nexthigh_rev >> 16; /* For 31..24 */
+ oligo |= low_rev << 16;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK9; /* 31 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ *(--pointers[masked]) = chrpos;
}
}
-
- masked = (high_rev >> 16) & MASK7; /* 1 */
+
+ masked = (oligo >> 2) & MASK9; /* 30 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 1;
+ *(--pointers[masked]) = chrpos - 1;
}
}
- masked = (high_rev >> 14) & MASK7; /* 2 */
+ masked = (oligo >> 4) & MASK9; /* 29 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 2;
+ *(--pointers[masked]) = chrpos - 2;
}
}
- masked = (high_rev >> 12) & MASK7; /* 3 */
+ masked = (oligo >> 6) & MASK9; /* 28 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 3;
+ *(--pointers[masked]) = chrpos - 3;
}
}
- masked = (high_rev >> 10) & MASK7; /* 4 */
+ masked = (oligo >> 8) & MASK9; /* 27 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 4;
+ *(--pointers[masked]) = chrpos - 4;
}
}
- masked = (high_rev >> 8) & MASK7; /* 5 */
+ masked = (oligo >> 10) & MASK9; /* 26 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 5;
+ *(--pointers[masked]) = chrpos - 5;
}
}
- masked = (high_rev >> 6) & MASK7; /* 6 */
+ masked = (oligo >> 12) & MASK9; /* 25 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 6;
+ *(--pointers[masked]) = chrpos - 6;
}
}
- masked = (high_rev >> 4) & MASK7; /* 7 */
+ masked = (oligo >> 14) & MASK9; /* 24 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 7;
+ *(--pointers[masked]) = chrpos - 7;
}
}
- masked = (high_rev >> 2) & MASK7; /* 8 */
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 8;
+ *(--pointers[masked]) = chrpos;
}
}
- masked = high_rev & MASK7; /* 9 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 9;
+ *(--pointers[masked]) = chrpos - 1;
}
}
-
- oligo = low_rev >> 20; /* For 10..15 */
- oligo |= high_rev << 12;
-
- masked = (oligo >> 10) & MASK7; /* 10 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 10;
+ *(--pointers[masked]) = chrpos - 2;
}
}
- masked = (oligo >> 8) & MASK7; /* 11 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 11;
+ *(--pointers[masked]) = chrpos - 3;
}
}
- masked = (oligo >> 6) & MASK7; /* 12 */
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 12;
+ *(--pointers[masked]) = chrpos - 4;
}
}
- masked = (oligo >> 4) & MASK7; /* 13 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 13;
+ *(--pointers[masked]) = chrpos - 5;
}
}
- masked = (oligo >> 2) & MASK7; /* 14 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 14;
+ *(--pointers[masked]) = chrpos - 6;
}
}
- masked = oligo & MASK7; /* 15 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 15;
+ *(--pointers[masked]) = chrpos - 7;
}
}
+#endif
- masked = low_rev >> 18; /* 16, No mask necessary */
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rev & MASK9; /* 23 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 16;
+ *(--pointers[masked]) = chrpos - 8;
}
}
-
- masked = (low_rev >> 16) & MASK7; /* 17 */
+
+ masked = (low_rev >> 2) & MASK9; /* 22 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 17;
+ *(--pointers[masked]) = chrpos - 9;
}
}
- masked = (low_rev >> 14) & MASK7; /* 18 */
+ masked = (low_rev >> 4) & MASK9; /* 21 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 18;
+ *(--pointers[masked]) = chrpos - 10;
}
}
- masked = (low_rev >> 12) & MASK7; /* 19 */
+ masked = (low_rev >> 6) & MASK9; /* 20 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 19;
+ *(--pointers[masked]) = chrpos - 11;
}
}
- masked = (low_rev >> 10) & MASK7; /* 20 */
+ masked = (low_rev >> 8) & MASK9; /* 19 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 20;
+ *(--pointers[masked]) = chrpos - 12;
}
}
- masked = (low_rev >> 8) & MASK7; /* 21 */
+ masked = (low_rev >> 10) & MASK9; /* 18 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 21;
+ *(--pointers[masked]) = chrpos - 13;
}
}
- masked = (low_rev >> 6) & MASK7; /* 22 */
+ masked = (low_rev >> 12) & MASK9; /* 17 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 22;
+ *(--pointers[masked]) = chrpos - 14;
}
}
- masked = (low_rev >> 4) & MASK7; /* 23 */
+ masked = low_rev >> 14; /* 16, No mask necessary */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 23;
+ *(--pointers[masked]) = chrpos - 15;
}
}
- masked = (low_rev >> 2) & MASK7; /* 24 */
+#else
+ _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 24;
+ *(--pointers[masked]) = chrpos - 8;
}
}
- masked = low_rev & MASK7; /* 25 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 25;
+ *(--pointers[masked]) = chrpos - 9;
}
}
-
- oligo = nexthigh_rev >> 20; /* For 26..31 */
- oligo |= low_rev << 12;
-
- masked = (oligo >> 10) & MASK7; /* 26 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 26;
+ *(--pointers[masked]) = chrpos - 10;
}
}
- masked = (oligo >> 8) & MASK7; /* 27 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 27;
+ *(--pointers[masked]) = chrpos - 11;
}
}
- masked = (oligo >> 6) & MASK7; /* 28 */
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 28;
+ *(--pointers[masked]) = chrpos - 12;
}
}
- masked = (oligo >> 4) & MASK7; /* 29 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 29;
+ *(--pointers[masked]) = chrpos - 13;
}
}
- masked = (oligo >> 2) & MASK7; /* 30 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 30;
+ *(--pointers[masked]) = chrpos - 14;
}
}
- masked = oligo & MASK7; /* 31 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 31;
+ *(--pointers[masked]) = chrpos - 15;
}
}
-
- return chrpos + 32;
-}
-
-
-static void
-count_6mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
- Genomecomp_T masked, oligo;
-
- masked = high_rev >> 20; /* 0, No mask necessary */
- counts[masked] += 1;
- debug(printf("0 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rev >> 18) & MASK6; /* 1 */
- counts[masked] += 1;
- debug(printf("1 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rev >> 16) & MASK6; /* 2 */
- counts[masked] += 1;
- debug(printf("2 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rev >> 14) & MASK6; /* 3 */
- counts[masked] += 1;
- debug(printf("3 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rev >> 12) & MASK6; /* 4 */
- counts[masked] += 1;
- debug(printf("4 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rev >> 10) & MASK6; /* 5 */
- counts[masked] += 1;
- debug(printf("5 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rev >> 8) & MASK6; /* 6 */
- counts[masked] += 1;
- debug(printf("6 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rev >> 6) & MASK6; /* 7 */
- counts[masked] += 1;
- debug(printf("7 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rev >> 4) & MASK6; /* 8 */
- counts[masked] += 1;
- debug(printf("8 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rev >> 2) & MASK6; /* 9 */
- counts[masked] += 1;
- debug(printf("9 %04X => %d\n",masked,counts[masked]));
-
- masked = high_rev & MASK6; /* 10 */
- counts[masked] += 1;
- debug(printf("10 %04X => %d\n",masked,counts[masked]));
-
-
- oligo = low_rev >> 22; /* For 11..15 */
- oligo |= high_rev << 10;
-
- masked = (oligo >> 8) & MASK6; /* 11 */
- counts[masked] += 1;
- debug(printf("11 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 6) & MASK6; /* 12 */
- counts[masked] += 1;
- debug(printf("12 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 4) & MASK6; /* 13 */
- counts[masked] += 1;
- debug(printf("13 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 2) & MASK6; /* 14 */
- counts[masked] += 1;
- debug(printf("14 %04X => %d\n",masked,counts[masked]));
-
- masked = oligo & MASK6; /* 15 */
- counts[masked] += 1;
- debug(printf("15 %04X => %d\n",masked,counts[masked]));
-
-
- masked = low_rev >> 20; /* 16, No mask necessary */
- counts[masked] += 1;
- debug(printf("16 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rev >> 18) & MASK6; /* 17 */
- counts[masked] += 1;
- debug(printf("17 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rev >> 16) & MASK6; /* 18 */
- counts[masked] += 1;
- debug(printf("18 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rev >> 14) & MASK6; /* 19 */
- counts[masked] += 1;
- debug(printf("19 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rev >> 12) & MASK6; /* 20 */
- counts[masked] += 1;
- debug(printf("20 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rev >> 10) & MASK6; /* 21 */
- counts[masked] += 1;
- debug(printf("21 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rev >> 8) & MASK6; /* 22 */
- counts[masked] += 1;
- debug(printf("22 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rev >> 6) & MASK6; /* 23 */
- counts[masked] += 1;
- debug(printf("23 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rev >> 4) & MASK6; /* 24 */
- counts[masked] += 1;
- debug(printf("24 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rev >> 2) & MASK6; /* 25 */
- counts[masked] += 1;
- debug(printf("25 %04X => %d\n",masked,counts[masked]));
-
- masked = low_rev & MASK6; /* 26 */
- counts[masked] += 1;
- debug(printf("26 %04X => %d\n",masked,counts[masked]));
-
-
- oligo = nexthigh_rev >> 22; /* For 27..31 */
- oligo |= low_rev << 10;
-
- masked = (oligo >> 8) & MASK6; /* 27 */
- counts[masked] += 1;
- debug(printf("27 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 6) & MASK6; /* 28 */
- counts[masked] += 1;
- debug(printf("28 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 4) & MASK6; /* 29 */
- counts[masked] += 1;
- debug(printf("29 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 2) & MASK6; /* 30 */
- counts[masked] += 1;
- debug(printf("30 %04X => %d\n",masked,counts[masked]));
-
- masked = oligo & MASK6; /* 31 */
- counts[masked] += 1;
- debug(printf("31 %04X => %d\n",masked,counts[masked]));
-
- return;
-}
-
-
-/* Expecting current to have {high0_rev, low0_rev, high1_rev,
- low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and
- high2_rev} */
-#ifdef USE_SIMD_FOR_COUNTS
-static void
-extract_6mers_fwd_simd (__m128i *out, __m128i current, __m128i next) {
- __m128i oligo;
-
- _mm_store_si128(out++, _mm_srli_epi32(current,20)); /* No mask necessary */;
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask6));
- _mm_store_si128(out++, _mm_and_si128( current, mask6));
-
- oligo = _mm_or_si128( _mm_srli_epi32(next,22), _mm_slli_epi32(current,10));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask6));
- _mm_store_si128(out++, _mm_and_si128( oligo, mask6));
-
- return;
-}
#endif
-static int
-store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
- Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
- Genomecomp_T masked, oligo;
+ oligo = low_rev >> 16; /* For 15..8 */
+ oligo |= high_rev << 16;
- masked = high_rev >> 20; /* 0, No mask necessary */
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK9; /* 15 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ *(--pointers[masked]) = chrpos - 16;
}
}
-
- masked = (high_rev >> 18) & MASK6; /* 1 */
+
+ masked = (oligo >> 2) & MASK9; /* 14 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 1;
+ *(--pointers[masked]) = chrpos - 17;
}
}
- masked = (high_rev >> 16) & MASK6; /* 2 */
+ masked = (oligo >> 4) & MASK9; /* 13 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 2;
+ *(--pointers[masked]) = chrpos - 18;
}
}
- masked = (high_rev >> 14) & MASK6; /* 3 */
+ masked = (oligo >> 6) & MASK9; /* 12 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 3;
+ *(--pointers[masked]) = chrpos - 19;
}
}
- masked = (high_rev >> 12) & MASK6; /* 4 */
+ masked = (oligo >> 8) & MASK9; /* 11 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 4;
+ *(--pointers[masked]) = chrpos - 20;
}
}
- masked = (high_rev >> 10) & MASK6; /* 5 */
+ masked = (oligo >> 10) & MASK9; /* 10 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 5;
+ *(--pointers[masked]) = chrpos - 21;
}
}
- masked = (high_rev >> 8) & MASK6; /* 6 */
+ masked = (oligo >> 12) & MASK9; /* 9 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 6;
+ *(--pointers[masked]) = chrpos - 22;
}
}
- masked = (high_rev >> 6) & MASK6; /* 7 */
+ masked = (oligo >> 14) & MASK9; /* 9 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 7;
+ *(--pointers[masked]) = chrpos - 23;
}
}
- masked = (high_rev >> 4) & MASK6; /* 8 */
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 8;
+ *(--pointers[masked]) = chrpos - 16;
}
}
- masked = (high_rev >> 2) & MASK6; /* 9 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 9;
+ *(--pointers[masked]) = chrpos - 17;
}
}
- masked = high_rev & MASK6; /* 10 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 10;
+ *(--pointers[masked]) = chrpos - 18;
}
}
-
- oligo = low_rev >> 22; /* For 11..15 */
- oligo |= high_rev << 10;
-
- masked = (oligo >> 8) & MASK6; /* 11 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 11;
+ *(--pointers[masked]) = chrpos - 19;
}
}
- masked = (oligo >> 6) & MASK6; /* 12 */
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 12;
+ *(--pointers[masked]) = chrpos - 20;
}
}
- masked = (oligo >> 4) & MASK6; /* 13 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 13;
+ *(--pointers[masked]) = chrpos - 21;
}
}
- masked = (oligo >> 2) & MASK6; /* 14 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 14;
+ *(--pointers[masked]) = chrpos - 22;
}
}
- masked = oligo & MASK6; /* 15 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 15;
+ *(--pointers[masked]) = chrpos - 23;
}
}
+#endif
- masked = low_rev >> 20; /* 16, No mask necessary */
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rev & MASK9; /* 7 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 16;
+ *(--pointers[masked]) = chrpos - 24;
}
}
-
- masked = (low_rev >> 18) & MASK6; /* 17 */
+
+ masked = (high_rev >> 2) & MASK9; /* 6 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 17;
+ *(--pointers[masked]) = chrpos - 25;
}
}
- masked = (low_rev >> 16) & MASK6; /* 18 */
+ masked = (high_rev >> 4) & MASK9; /* 5 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 18;
+ *(--pointers[masked]) = chrpos - 26;
}
}
- masked = (low_rev >> 14) & MASK6; /* 19 */
+ masked = (high_rev >> 6) & MASK9; /* 4 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 19;
+ *(--pointers[masked]) = chrpos - 27;
}
}
- masked = (low_rev >> 12) & MASK6; /* 20 */
+ masked = (high_rev >> 8) & MASK9; /* 3 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 20;
+ *(--pointers[masked]) = chrpos - 28;
}
}
- masked = (low_rev >> 10) & MASK6; /* 21 */
+ masked = (high_rev >> 10) & MASK9; /* 2 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 21;
+ *(--pointers[masked]) = chrpos - 29;
}
}
- masked = (low_rev >> 8) & MASK6; /* 22 */
+ masked = (high_rev >> 12) & MASK9; /* 1 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 22;
+ *(--pointers[masked]) = chrpos - 30;
}
}
- masked = (low_rev >> 6) & MASK6; /* 23 */
+ masked = high_rev >> 14; /* 0, No mask necessary */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 23;
+ *(--pointers[masked]) = chrpos - 31;
}
}
- masked = (low_rev >> 4) & MASK6; /* 24 */
+#else
+ _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 24;
+ *(--pointers[masked]) = chrpos - 24;
}
}
- masked = (low_rev >> 2) & MASK6; /* 25 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 25;
+ *(--pointers[masked]) = chrpos - 25;
}
}
- masked = low_rev & MASK6; /* 26 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 26;
+ *(--pointers[masked]) = chrpos - 26;
}
}
-
- oligo = nexthigh_rev >> 22; /* For 27..31 */
- oligo |= low_rev << 10;
-
- masked = (oligo >> 8) & MASK6; /* 27 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 27;
+ *(--pointers[masked]) = chrpos - 27;
}
}
- masked = (oligo >> 6) & MASK6; /* 28 */
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 28;
+ *(--pointers[masked]) = chrpos - 28;
}
}
- masked = (oligo >> 4) & MASK6; /* 29 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 29;
+ *(--pointers[masked]) = chrpos - 29;
}
}
- masked = (oligo >> 2) & MASK6; /* 30 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 30;
+ *(--pointers[masked]) = chrpos - 30;
}
}
- masked = oligo & MASK6; /* 31 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 31;
+ *(--pointers[masked]) = chrpos - 31;
}
}
+#endif
- return chrpos + 32;
+ return chrpos - 32;
}
+
static void
-count_5mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+count_8mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
- masked = high_rev >> 22; /* 0, No mask necessary */
- counts[masked] += 1;
- debug(printf("0 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rev >> 20) & MASK5; /* 1 */
- counts[masked] += 1;
- debug(printf("1 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 18) & MASK5; /* 2 */
- counts[masked] += 1;
- debug(printf("2 %04X => %d\n",masked,counts[masked]));
+ oligo = nexthigh_rev >> 18; /* For 31..25 */
+ oligo |= low_rev << 14;
- masked = (high_rev >> 16) & MASK5; /* 3 */
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK8; /* 31 */
counts[masked] += 1;
- debug(printf("3 %04X => %d\n",masked,counts[masked]));
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 14) & MASK5; /* 4 */
+ masked = (oligo >> 2) & MASK8; /* 30 */
counts[masked] += 1;
- debug(printf("4 %04X => %d\n",masked,counts[masked]));
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 12) & MASK5; /* 5 */
+ masked = (oligo >> 4) & MASK8; /* 29 */
counts[masked] += 1;
- debug(printf("5 %04X => %d\n",masked,counts[masked]));
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 10) & MASK5; /* 6 */
+ masked = (oligo >> 6) & MASK8; /* 28 */
counts[masked] += 1;
- debug(printf("6 %04X => %d\n",masked,counts[masked]));
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 8) & MASK5; /* 7 */
+ masked = (oligo >> 8) & MASK8; /* 27 */
counts[masked] += 1;
- debug(printf("7 %04X => %d\n",masked,counts[masked]));
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 6) & MASK5; /* 8 */
+ masked = (oligo >> 10) & MASK8; /* 26 */
counts[masked] += 1;
- debug(printf("8 %04X => %d\n",masked,counts[masked]));
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 4) & MASK5; /* 9 */
+ masked = (oligo >> 12) & MASK8; /* 25 */
counts[masked] += 1;
- debug(printf("9 %04X => %d\n",masked,counts[masked]));
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 2) & MASK5; /* 10 */
- counts[masked] += 1;
- debug(printf("10 %04X => %d\n",masked,counts[masked]));
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask8);
- masked = high_rev & MASK5; /* 11 */
+ masked = _mm_extract_epi32(_masked,0);
counts[masked] += 1;
- debug(printf("11 %04X => %d\n",masked,counts[masked]));
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
- oligo = low_rev >> 24; /* For 12..15 */
- oligo |= high_rev << 8;
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 6) & MASK5; /* 12 */
+ masked = _mm_extract_epi32(_masked,3);
counts[masked] += 1;
- debug(printf("12 %04X => %d\n",masked,counts[masked]));
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 4) & MASK5; /* 13 */
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
counts[masked] += 1;
- debug(printf("13 %04X => %d\n",masked,counts[masked]));
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 2) & MASK5; /* 14 */
+ masked = _mm_extract_epi32(_masked,1);
counts[masked] += 1;
- debug(printf("14 %04X => %d\n",masked,counts[masked]));
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
- masked = oligo & MASK5; /* 15 */
+ masked = _mm_extract_epi32(_masked,2);
counts[masked] += 1;
- debug(printf("15 %04X => %d\n",masked,counts[masked]));
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
+#endif
- masked = low_rev >> 22; /* 16, No mask necessary */
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rev & MASK8; /* 24 */
counts[masked] += 1;
- debug(printf("16 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rev >> 20) & MASK5; /* 17 */
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 2) & MASK8; /* 23 */
counts[masked] += 1;
- debug(printf("17 %04X => %d\n",masked,counts[masked]));
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 18) & MASK5; /* 18 */
+ masked = (low_rev >> 4) & MASK8; /* 22 */
counts[masked] += 1;
- debug(printf("18 %04X => %d\n",masked,counts[masked]));
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 16) & MASK5; /* 19 */
+ masked = (low_rev >> 6) & MASK8; /* 21 */
counts[masked] += 1;
- debug(printf("19 %04X => %d\n",masked,counts[masked]));
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 14) & MASK5; /* 20 */
+ masked = (low_rev >> 8) & MASK8; /* 20 */
counts[masked] += 1;
debug(printf("20 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 12) & MASK5; /* 21 */
+ masked = (low_rev >> 10) & MASK8; /* 19 */
counts[masked] += 1;
- debug(printf("21 %04X => %d\n",masked,counts[masked]));
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 10) & MASK5; /* 22 */
+ masked = (low_rev >> 12) & MASK8; /* 18 */
counts[masked] += 1;
- debug(printf("22 %04X => %d\n",masked,counts[masked]));
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 8) & MASK5; /* 23 */
+ masked = (low_rev >> 14) & MASK8; /* 17 */
counts[masked] += 1;
- debug(printf("23 %04X => %d\n",masked,counts[masked]));
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 6) & MASK5; /* 24 */
+ masked = low_rev >> 16; /* 16, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("16 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
counts[masked] += 1;
debug(printf("24 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 4) & MASK5; /* 25 */
+ masked = _mm_extract_epi32(_masked,1);
counts[masked] += 1;
- debug(printf("25 %04X => %d\n",masked,counts[masked]));
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
- masked = (low_rev >> 2) & MASK5; /* 26 */
+ masked = _mm_extract_epi32(_masked,2);
counts[masked] += 1;
- debug(printf("26 %04X => %d\n",masked,counts[masked]));
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
- masked = low_rev & MASK5; /* 27 */
+ masked = _mm_extract_epi32(_masked,3);
counts[masked] += 1;
- debug(printf("27 %04X => %d\n",masked,counts[masked]));
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
- oligo = nexthigh_rev >> 24; /* For 28..31 */
- oligo |= low_rev << 8;
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask8);
- masked = (oligo >> 6) & MASK5; /* 28 */
+ masked = _mm_extract_epi32(_masked,0);
counts[masked] += 1;
- debug(printf("28 %04X => %d\n",masked,counts[masked]));
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 4) & MASK5; /* 29 */
+ masked = _mm_extract_epi32(_masked,1);
counts[masked] += 1;
- debug(printf("29 %04X => %d\n",masked,counts[masked]));
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 2) & MASK5; /* 30 */
+ masked = _mm_extract_epi32(_masked,2);
counts[masked] += 1;
- debug(printf("30 %04X => %d\n",masked,counts[masked]));
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
- masked = oligo & MASK5; /* 31 */
+ masked = _mm_extract_epi32(_masked,3);
counts[masked] += 1;
- debug(printf("31 %04X => %d\n",masked,counts[masked]));
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
- return;
-}
+ masked = low_rev >> 16; /* 16, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("16 %04X => %d\n",masked,counts[masked]));
+#endif
-#ifdef USE_SIMD_FOR_COUNTS
-static void
-extract_5mers_fwd_simd (__m128i *out, __m128i current, __m128i next) {
- __m128i oligo;
- _mm_store_si128(out++, _mm_srli_epi32(current,22)); /* No mask necessary */
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,20), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask5));
- _mm_store_si128(out++, _mm_and_si128( current, mask5));
+ oligo = low_rev >> 18; /* For 15..9 */
+ oligo |= high_rev << 14;
- oligo = _mm_or_si128( _mm_srli_epi32(next,24), _mm_slli_epi32(current,8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask5));
- _mm_store_si128(out++, _mm_and_si128( oligo, mask5));
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK8; /* 15 */
+ counts[masked] += 1;
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
- return;
-}
-#endif
+ masked = (oligo >> 2) & MASK8; /* 14 */
+ counts[masked] += 1;
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
+ masked = (oligo >> 4) & MASK8; /* 13 */
+ counts[masked] += 1;
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
-static int
-store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
- Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
- Genomecomp_T masked, oligo;
+ masked = (oligo >> 6) & MASK8; /* 12 */
+ counts[masked] += 1;
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
- masked = high_rev >> 22; /* 0, No mask necessary */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
- }
-
- masked = (high_rev >> 20) & MASK5; /* 1 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 1;
- }
- }
+ masked = (oligo >> 8) & MASK8; /* 11 */
+ counts[masked] += 1;
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 18) & MASK5; /* 2 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 2;
- }
- }
+ masked = (oligo >> 10) & MASK8; /* 10 */
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 16) & MASK5; /* 3 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 3;
- }
- }
+ masked = (oligo >> 12) & MASK8; /* 9 */
+ counts[masked] += 1;
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 14) & MASK5; /* 4 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 4;
- }
- }
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask8);
- masked = (high_rev >> 12) & MASK5; /* 5 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 5;
- }
- }
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 10) & MASK5; /* 6 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 6;
- }
- }
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 8) & MASK5; /* 7 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 7;
- }
- }
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 6) & MASK5; /* 8 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 8;
- }
- }
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
- masked = (high_rev >> 4) & MASK5; /* 9 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 9;
- }
- }
- masked = (high_rev >> 2) & MASK5; /* 10 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 10;
- }
- }
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask8);
- masked = high_rev & MASK5; /* 11 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 11;
- }
- }
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
- oligo = low_rev >> 24; /* For 12..15 */
- oligo |= high_rev << 8;
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
+#endif
- masked = (oligo >> 6) & MASK5; /* 12 */
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rev & MASK8; /* 8 */
+ counts[masked] += 1;
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 2) & MASK8; /* 7 */
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 4) & MASK8; /* 6 */
+ counts[masked] += 1;
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 6) & MASK8; /* 5 */
+ counts[masked] += 1;
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 8) & MASK8; /* 4 */
+ counts[masked] += 1;
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 10) & MASK8; /* 3 */
+ counts[masked] += 1;
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 12) & MASK8; /* 2 */
+ counts[masked] += 1;
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 14) & MASK8; /* 1 */
+ counts[masked] += 1;
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+ masked = high_rev >> 16; /* 0, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+
+ masked = high_rev >> 16; /* 0, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
+#endif
+
+ return;
+}
+
+/* Expecting current to have {high0_rev, low0_rev, high1_rev,
+ low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and
+ high2_rev} */
+#ifdef USE_SIMD_FOR_COUNTS
+static void
+extract_8mers_fwd_simd (__m128i *out, __m128i current, __m128i next) {
+ __m128i oligo;
+
+ _mm_store_si128(out++, _mm_srli_epi32(current,16)); /* No mask necessary */
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask8));
+ _mm_store_si128(out++, _mm_and_si128( current, mask8));
+
+ oligo = _mm_or_si128( _mm_srli_epi32(next,18), _mm_slli_epi32(current,14));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,12), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask8));
+ _mm_store_si128(out++, _mm_and_si128( oligo, mask8));
+
+ return;
+}
+
+static void
+count_8mers_fwd_simd (Count_T *counts, __m128i current, __m128i next) {
+ __m128i oligo;
+ Genomecomp_T array[4];
+
+ _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,16)); /* No mask necessary */
+ counts[array[0]] += 1; /* 0 */
+ counts[array[1]] += 1; /* 16 */
+ counts[array[2]] += 1; /* 32 */
+ counts[array[3]] += 1; /* 48 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask8));
+ counts[array[0]] += 1; /* 1 */
+ counts[array[1]] += 1; /* 17 */
+ counts[array[2]] += 1; /* 33 */
+ counts[array[3]] += 1; /* 49 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask8));
+ counts[array[0]] += 1; /* 2 */
+ counts[array[1]] += 1; /* 18 */
+ counts[array[2]] += 1; /* 34 */
+ counts[array[3]] += 1; /* 50 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask8));
+ counts[array[0]] += 1; /* 3 */
+ counts[array[1]] += 1; /* 19 */
+ counts[array[2]] += 1; /* 35 */
+ counts[array[3]] += 1; /* 51 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask8));
+ counts[array[0]] += 1; /* 4 */
+ counts[array[1]] += 1; /* 20 */
+ counts[array[2]] += 1; /* 36 */
+ counts[array[3]] += 1; /* 52 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask8));
+ counts[array[0]] += 1; /* 5 */
+ counts[array[1]] += 1; /* 21 */
+ counts[array[2]] += 1; /* 37 */
+ counts[array[3]] += 1; /* 53 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask8));
+ counts[array[0]] += 1; /* 6 */
+ counts[array[1]] += 1; /* 22 */
+ counts[array[2]] += 1; /* 38 */
+ counts[array[3]] += 1; /* 54 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask8));
+ counts[array[0]] += 1; /* 7 */
+ counts[array[1]] += 1; /* 23 */
+ counts[array[2]] += 1; /* 39 */
+ counts[array[3]] += 1; /* 55 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask8));
+ counts[array[0]] += 1; /* 8 */
+ counts[array[1]] += 1; /* 24 */
+ counts[array[2]] += 1; /* 40 */
+ counts[array[3]] += 1; /* 56 */
+
+
+ oligo = _mm_or_si128( _mm_srli_epi32(next,18), _mm_slli_epi32(current,14));
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,12), mask8));
+ counts[array[0]] += 1; /* 9 */
+ counts[array[1]] += 1; /* 25 */
+ counts[array[2]] += 1; /* 41 */
+ counts[array[3]] += 1; /* 57 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,10), mask8));
+ counts[array[0]] += 1; /* 10 */
+ counts[array[1]] += 1; /* 26 */
+ counts[array[2]] += 1; /* 42 */
+ counts[array[3]] += 1; /* 58 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask8));
+ counts[array[0]] += 1; /* 11 */
+ counts[array[1]] += 1; /* 27 */
+ counts[array[2]] += 1; /* 43 */
+ counts[array[3]] += 1; /* 59 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask8));
+ counts[array[0]] += 1; /* 12 */
+ counts[array[1]] += 1; /* 28 */
+ counts[array[2]] += 1; /* 44 */
+ counts[array[3]] += 1; /* 60 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask8));
+ counts[array[0]] += 1; /* 13 */
+ counts[array[1]] += 1; /* 29 */
+ counts[array[2]] += 1; /* 45 */
+ counts[array[3]] += 1; /* 61 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask8));
+ counts[array[0]] += 1; /* 14 */
+ counts[array[1]] += 1; /* 30 */
+ counts[array[2]] += 1; /* 46 */
+ counts[array[3]] += 1; /* 62 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask8));
+ counts[array[0]] += 1; /* 15 */
+ counts[array[1]] += 1; /* 31 */
+ counts[array[2]] += 1; /* 47 */
+ counts[array[3]] += 1; /* 63 */
+
+ return;
+}
+#endif
+
+
+static int
+store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+ Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+ Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
+
+
+ oligo = nexthigh_rev >> 18; /* For 31..25 */
+ oligo |= low_rev << 14;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK8; /* 31 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 12;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos));
+ *(--pointers[masked]) = chrpos;
}
}
- masked = (oligo >> 4) & MASK5; /* 13 */
+ masked = (oligo >> 2) & MASK8; /* 30 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 13;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+ *(--pointers[masked]) = chrpos - 1;
}
}
- masked = (oligo >> 2) & MASK5; /* 14 */
+ masked = (oligo >> 4) & MASK8; /* 29 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 14;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+ *(--pointers[masked]) = chrpos - 2;
}
}
- masked = oligo & MASK5; /* 15 */
+ masked = (oligo >> 6) & MASK8; /* 28 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 15;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+ *(--pointers[masked]) = chrpos - 3;
}
}
+ masked = (oligo >> 8) & MASK8; /* 27 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+ *(--pointers[masked]) = chrpos - 4;
+ }
+ }
- masked = low_rev >> 22; /* 16, No mask necessary */
+ masked = (oligo >> 10) & MASK8; /* 26 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 16;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+ *(--pointers[masked]) = chrpos - 5;
}
}
-
- masked = (low_rev >> 20) & MASK5; /* 17 */
+
+ masked = (oligo >> 12) & MASK8; /* 25 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 17;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+ *(--pointers[masked]) = chrpos - 6;
}
}
- masked = (low_rev >> 18) & MASK5; /* 18 */
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 18;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos));
+ *(--pointers[masked]) = chrpos;
}
}
- masked = (low_rev >> 16) & MASK5; /* 19 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 19;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+ *(--pointers[masked]) = chrpos - 1;
}
}
- masked = (low_rev >> 14) & MASK5; /* 20 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 20;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+ *(--pointers[masked]) = chrpos - 2;
}
}
- masked = (low_rev >> 12) & MASK5; /* 21 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 21;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+ *(--pointers[masked]) = chrpos - 3;
}
}
- masked = (low_rev >> 10) & MASK5; /* 22 */
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 22;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+ *(--pointers[masked]) = chrpos - 4;
}
}
- masked = (low_rev >> 8) & MASK5; /* 23 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 23;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+ *(--pointers[masked]) = chrpos - 5;
}
}
- masked = (low_rev >> 6) & MASK5; /* 24 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 24;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+ *(--pointers[masked]) = chrpos - 6;
}
}
+#endif
- masked = (low_rev >> 4) & MASK5; /* 25 */
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rev & MASK8; /* 24 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 25;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+ *(--pointers[masked]) = chrpos - 7;
}
}
- masked = (low_rev >> 2) & MASK5; /* 26 */
+ masked = (low_rev >> 2) & MASK8; /* 23 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 26;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+ *(--pointers[masked]) = chrpos - 8;
}
}
- masked = low_rev & MASK5; /* 27 */
+ masked = (low_rev >> 4) & MASK8; /* 22 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 27;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+ *(--pointers[masked]) = chrpos - 9;
}
}
+ masked = (low_rev >> 6) & MASK8; /* 21 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+ *(--pointers[masked]) = chrpos - 10;
+ }
+ }
- oligo = nexthigh_rev >> 24; /* For 28..31 */
- oligo |= low_rev << 8;
+ masked = (low_rev >> 8) & MASK8; /* 20 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+ *(--pointers[masked]) = chrpos - 11;
+ }
+ }
- masked = (oligo >> 6) & MASK5; /* 28 */
+ masked = (low_rev >> 10) & MASK8; /* 19 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 28;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+ *(--pointers[masked]) = chrpos - 12;
}
}
- masked = (oligo >> 4) & MASK5; /* 29 */
+ masked = (low_rev >> 12) & MASK8; /* 18 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 29;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+ *(--pointers[masked]) = chrpos - 13;
}
}
- masked = (oligo >> 2) & MASK5; /* 30 */
+ masked = (low_rev >> 14) & MASK8; /* 17 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 30;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+ *(--pointers[masked]) = chrpos - 14;
}
}
- masked = oligo & MASK5; /* 31 */
+ masked = low_rev >> 16; /* 16, No mask necessary */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 31;
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+ *(--pointers[masked]) = chrpos - 15;
}
}
- return chrpos + 32;
-}
+#else
+ _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask8);
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+ *(--pointers[masked]) = chrpos - 7;
+ }
+ }
-#if (!defined(USE_SIMD_FOR_COUNTS) || defined(DEBUG14))
-static void
-count_positions_fwd_std (Count_T *counts, int indexsize, Univcoord_T left, Univcoord_T left_plus_length,
- int genestrand) {
- int startdiscard, enddiscard;
- Genomecomp_T ptr, startptr, endptr, high_rev, low_rev, nexthigh_rev,
- low, high, nextlow;
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+ *(--pointers[masked]) = chrpos - 8;
+ }
+ }
- debug(printf("Starting count_positions_fwd_std\n"));
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+ *(--pointers[masked]) = chrpos - 9;
+ }
+ }
- left_plus_length -= indexsize;
-#if 0
- /* No. Extends past end. */
- left_plus_length += 1; /* Needed to get last oligomer to match */
-#endif
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+ *(--pointers[masked]) = chrpos - 10;
+ }
+ }
- ptr = startptr = left/32U*3;
- endptr = left_plus_length/32U*3;
- startdiscard = left % 32; /* (left+pos5) % 32 */
- enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
-
- if (left_plus_length <= left) {
- /* Skip */
- } else if (startptr == endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
- }
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+ *(--pointers[masked]) = chrpos - 11;
}
+ }
- high_rev = reverse_nt[low >> 16];
- high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
-
- if (indexsize == 8) {
- count_8mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
- } else if (indexsize == 7) {
- count_7mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
- } else if (indexsize == 6) {
- count_6mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
- } else if (indexsize == 5) {
- count_5mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
} else {
- fprintf(stderr,"indexsize %d not supported\n",indexsize);
- abort();
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+ *(--pointers[masked]) = chrpos - 12;
}
+ }
- } else {
- /* Genome_print_blocks(ref_blocks,left,left+16); */
-
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
- }
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+ *(--pointers[masked]) = chrpos - 13;
}
+ }
- high_rev = reverse_nt[low >> 16];
- high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
-
- if (indexsize == 8) {
- count_8mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
- } else if (indexsize == 7) {
- count_7mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
- } else if (indexsize == 6) {
- count_6mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
- } else if (indexsize == 5) {
- count_5mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
} else {
- fprintf(stderr,"indexsize %d not supported\n",indexsize);
- abort();
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+ *(--pointers[masked]) = chrpos - 14;
}
+ }
- ptr += 3;
-
- if (indexsize == 8) {
- while (ptr < endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
- }
- }
-
- high_rev = reverse_nt[low >> 16];
- high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
-
- count_8mers_fwd(counts,high_rev,low_rev,nexthigh_rev);
- ptr += 3;
- }
- } else if (indexsize == 7) {
- while (ptr < endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
- }
- }
-
- high_rev = reverse_nt[low >> 16];
- high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
- count_7mers_fwd(counts,high_rev,low_rev,nexthigh_rev);
- ptr += 3;
- }
- } else if (indexsize == 6) {
- while (ptr < endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- nextlow = ref_blocks[ptr+4];
+ masked = low_rev >> 16; /* 16, No mask necessary */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+ *(--pointers[masked]) = chrpos - 15;
+ }
+ }
#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
- }
- }
- high_rev = reverse_nt[low >> 16];
- high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
- count_6mers_fwd(counts,high_rev,low_rev,nexthigh_rev);
- ptr += 3;
- }
- } else if (indexsize == 5) {
- while (ptr < endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+ oligo = low_rev >> 18; /* For 9..15 */
+ oligo |= high_rev << 14;
- high_rev = reverse_nt[low >> 16];
- high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK8; /* 15 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+ *(--pointers[masked]) = chrpos - 16;
+ }
+ }
- count_5mers_fwd(counts,high_rev,low_rev,nexthigh_rev);
- ptr += 3;
- }
+ masked = (oligo >> 2) & MASK8; /* 14 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
} else {
- abort();
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+ *(--pointers[masked]) = chrpos - 17;
}
+ }
+ masked = (oligo >> 4) & MASK8; /* 13 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+ *(--pointers[masked]) = chrpos - 18;
+ }
+ }
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
- }
+ masked = (oligo >> 6) & MASK8; /* 12 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+ *(--pointers[masked]) = chrpos - 19;
}
+ }
- high_rev = reverse_nt[low >> 16];
- high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = (oligo >> 8) & MASK8; /* 11 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+ *(--pointers[masked]) = chrpos - 20;
+ }
+ }
- if (indexsize == 8) {
- count_8mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
- } else if (indexsize == 7) {
- count_7mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
- } else if (indexsize == 6) {
- count_6mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
- } else if (indexsize == 5) {
- count_5mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ masked = (oligo >> 10) & MASK8; /* 10 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
} else {
- abort();
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+ *(--pointers[masked]) = chrpos - 21;
}
+ }
+ masked = (oligo >> 12) & MASK8; /* 9 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+ *(--pointers[masked]) = chrpos - 22;
+ }
}
-
- return;
-}
-#endif
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask8);
-#if 0
-/* For debugging of SIMD procedures*/
-static void
-print_vector (__m128i x, char *label) {
- __m128i a[1];
- unsigned int *s = a;
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+ *(--pointers[masked]) = chrpos - 16;
+ }
+ }
- _mm_store_si128(a,x);
- _mm_mfence();
- printf("%s: %u %u %u %u\n",label,s[0],s[1],s[2],s[3]);
- return;
-}
-#endif
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+ *(--pointers[masked]) = chrpos - 17;
+ }
+ }
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+ *(--pointers[masked]) = chrpos - 18;
+ }
+ }
-#ifdef USE_SIMD_FOR_COUNTS
-static void
-count_positions_fwd_simd (Count_T *counts, int indexsize, Univcoord_T left, Univcoord_T left_plus_length,
- int genestrand) {
- int startdiscard, enddiscard;
- Genomecomp_T ptr, startptr, endptr, high_rev, low_rev, nexthigh_rev,
- low, high, nextlow;
- Genomecomp_T high0_rev, low0_rev, high1_rev, low1_rev, /*low0,*/ high0, low1, high1;
- __m128i current, next;
- __m128i array[16];
-#ifdef HAVE_SSE4_1
- __m128i temp;
-#endif
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+ *(--pointers[masked]) = chrpos - 19;
+ }
+ }
- debug(printf("Starting count_positions_fwd_simd\n"));
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask8);
- left_plus_length -= indexsize;
-#if 0
- /* No. Extends past end. */
- left_plus_length += 1; /* Needed to get last oligomer to match */
-#endif
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+ *(--pointers[masked]) = chrpos - 20;
+ }
+ }
- ptr = startptr = left/32U*3;
- endptr = left_plus_length/32U*3;
- startdiscard = left % 32; /* (left+pos5) % 32 */
- enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
-
- if (left_plus_length <= left) {
- /* Skip */
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+ *(--pointers[masked]) = chrpos - 21;
+ }
+ }
- } else if (startptr == endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
- }
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+ *(--pointers[masked]) = chrpos - 22;
}
+ }
+#endif
- high_rev = reverse_nt[low >> 16];
- high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
- if (indexsize == 8) {
- count_8mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
- } else if (indexsize == 7) {
- count_7mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
- } else if (indexsize == 6) {
- count_6mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
- } else if (indexsize == 5) {
- count_5mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rev & MASK8; /* 8 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
} else {
- fprintf(stderr,"indexsize %d not supported\n",indexsize);
- abort();
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+ *(--pointers[masked]) = chrpos - 23;
}
+ }
- } else {
- /* Genome_print_blocks(ref_blocks,left,left+16); */
-
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
- }
+ masked = (high_rev >> 2) & MASK8; /* 7 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+ *(--pointers[masked]) = chrpos - 24;
}
+ }
- high_rev = reverse_nt[low >> 16];
- high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = (high_rev >> 4) & MASK8; /* 6 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+ *(--pointers[masked]) = chrpos - 25;
+ }
+ }
- if (indexsize == 8) {
- count_8mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
- } else if (indexsize == 7) {
- count_7mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
- } else if (indexsize == 6) {
- count_6mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
- } else if (indexsize == 5) {
- count_5mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ masked = (high_rev >> 6) & MASK8; /* 5 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
} else {
- fprintf(stderr,"indexsize %d not supported\n",indexsize);
- abort();
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+ *(--pointers[masked]) = chrpos - 26;
}
+ }
- ptr += 3;
+ masked = (high_rev >> 8) & MASK8; /* 4 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+ *(--pointers[masked]) = chrpos - 27;
+ }
+ }
- if (indexsize == 8) {
- while (ptr + 3 < endptr) {
-#ifdef WORDS_BIGENDIAN
- high0 = Bigendian_convert_uint(ref_blocks[ptr]);
- /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
- high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
- low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
-#else
- high0 = ref_blocks[ptr];
- /* low0 = ref_blocks[ptr+1]; */
- high1 = ref_blocks[ptr+3];
- low1 = ref_blocks[ptr+4];
- nextlow = ref_blocks[ptr+7];
-#endif
- if (mode == CMET_STRANDED) {
- high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
- high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
- high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
- high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+ masked = (high_rev >> 10) & MASK8; /* 3 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+ *(--pointers[masked]) = chrpos - 28;
+ }
+ }
- high0_rev = nexthigh_rev; /* depended on low0 */
- low0_rev = reverse_nt[high0 >> 16];
- low0_rev |= (reverse_nt[high0 & 0x0000FFFF] << 16);
- high1_rev = reverse_nt[low1 >> 16];
- high1_rev |= (reverse_nt[low1 & 0x0000FFFF] << 16);
- low1_rev = reverse_nt[high1 >> 16];
- low1_rev |= (reverse_nt[high1 & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = (high_rev >> 12) & MASK8; /* 2 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+ *(--pointers[masked]) = chrpos - 29;
+ }
+ }
- current = _mm_setr_epi32(high0_rev,low0_rev,high1_rev,low1_rev);
-#ifdef HAVE_SSE4_1
- temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
- next = _mm_shuffle_epi32(temp,0x39);
-#else
- next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
-#endif
+ masked = (high_rev >> 14) & MASK8; /* 1 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+ *(--pointers[masked]) = chrpos - 30;
+ }
+ }
- extract_8mers_fwd_simd(array,current,next);
- count_fwdrev_simd(counts,(Genomecomp_T *) array);
- ptr += 6;
- }
+ masked = high_rev >> 16; /* 0, No mask necessary */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+ *(--pointers[masked]) = chrpos - 31;
+ }
+ }
- if (ptr < endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- /* low = Bigendian_convert_uint(ref_blocks[ptr+1]); */
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
#else
- high = ref_blocks[ptr];
- /* low = ref_blocks[ptr+1]; */
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); /* low = Cmet_reduce_ga(low); */ nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+ _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask8);
- high_rev = nexthigh_rev; /* depended on low */
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+ *(--pointers[masked]) = chrpos - 23;
+ }
+ }
- count_8mers_fwd(counts,high_rev,low_rev,nexthigh_rev);
- ptr += 3;
- }
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+ *(--pointers[masked]) = chrpos - 24;
+ }
+ }
- } else if (indexsize == 7) {
- while (ptr + 3 < endptr) {
-#ifdef WORDS_BIGENDIAN
- high0 = Bigendian_convert_uint(ref_blocks[ptr]);
- /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
- high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
- low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
-#else
- high0 = ref_blocks[ptr];
- /* low0 = ref_blocks[ptr+1]; */
- high1 = ref_blocks[ptr+3];
- low1 = ref_blocks[ptr+4];
- nextlow = ref_blocks[ptr+7];
-#endif
- if (mode == CMET_STRANDED) {
- high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
- high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
- high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
- high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+ *(--pointers[masked]) = chrpos - 25;
+ }
+ }
- high0_rev = nexthigh_rev; /* depended on low0 */
- low0_rev = reverse_nt[high0 >> 16];
- low0_rev |= (reverse_nt[high0 & 0x0000FFFF] << 16);
- high1_rev = reverse_nt[low1 >> 16];
- high1_rev |= (reverse_nt[low1 & 0x0000FFFF] << 16);
- low1_rev = reverse_nt[high1 >> 16];
- low1_rev |= (reverse_nt[high1 & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+ *(--pointers[masked]) = chrpos - 26;
+ }
+ }
- current = _mm_setr_epi32(high0_rev,low0_rev,high1_rev,low1_rev);
-#ifdef HAVE_SSE4_1
- temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
- next = _mm_shuffle_epi32(temp,0x39);
-#else
- next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
-#endif
- extract_7mers_fwd_simd(array,current,next);
- count_fwdrev_simd(counts,(Genomecomp_T *) array);
- ptr += 6;
- }
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask8);
- if (ptr < endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- /* low = Bigendian_convert_uint(ref_blocks[ptr+1]); */
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- /* low = ref_blocks[ptr+1]; */
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); /* low = Cmet_reduce_ga(low); */ nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+ *(--pointers[masked]) = chrpos - 27;
+ }
+ }
- high_rev = nexthigh_rev; /* depended on low */
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+ *(--pointers[masked]) = chrpos - 28;
+ }
+ }
- count_7mers_fwd(counts,high_rev,low_rev,nexthigh_rev);
- ptr += 3;
- }
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+ *(--pointers[masked]) = chrpos - 29;
+ }
+ }
- } else if (indexsize == 6) {
- while (ptr + 3 < endptr) {
-#ifdef WORDS_BIGENDIAN
- high0 = Bigendian_convert_uint(ref_blocks[ptr]);
- /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
- high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
- low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
-#else
- high0 = ref_blocks[ptr];
- /* low0 = ref_blocks[ptr+1]; */
- high1 = ref_blocks[ptr+3];
- low1 = ref_blocks[ptr+4];
- nextlow = ref_blocks[ptr+7];
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+ *(--pointers[masked]) = chrpos - 30;
+ }
+ }
+
+ masked = high_rev >> 16; /* 0, No mask necessary */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+ *(--pointers[masked]) = chrpos - 31;
+ }
+ }
#endif
- if (mode == CMET_STRANDED) {
- high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
- high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
- high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
- high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- nextlow = Cmet_reduce_ga(nextlow);
- }
- }
- high0_rev = nexthigh_rev; /* depended on low0 */
- low0_rev = reverse_nt[high0 >> 16];
- low0_rev |= (reverse_nt[high0 & 0x0000FFFF] << 16);
- high1_rev = reverse_nt[low1 >> 16];
- high1_rev |= (reverse_nt[low1 & 0x0000FFFF] << 16);
- low1_rev = reverse_nt[high1 >> 16];
- low1_rev |= (reverse_nt[high1 & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ return chrpos - 32;
+}
- current = _mm_setr_epi32(high0_rev,low0_rev,high1_rev,low1_rev);
-#ifdef HAVE_SSE4_1
- temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
- next = _mm_shuffle_epi32(temp,0x39);
-#else
- next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
-#endif
- extract_6mers_fwd_simd(array,current,next);
- count_fwdrev_simd(counts,(Genomecomp_T *) array);
- ptr += 6;
- }
- if (ptr < endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- /* low = Bigendian_convert_uint(ref_blocks[ptr+1]); */
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- /* low = ref_blocks[ptr+1]; */
- nextlow = ref_blocks[ptr+4];
+static void
+count_7mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+ Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); /* low = Cmet_reduce_ga(low); */ nextlow = Cmet_reduce_ga(nextlow);
- }
- }
- high_rev = nexthigh_rev; /* depended on low */
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
- count_6mers_fwd(counts,high_rev,low_rev,nexthigh_rev);
- ptr += 3;
- }
+ oligo = nexthigh_rev >> 20; /* For 31..26 */
+ oligo |= low_rev << 12;
- } else if (indexsize == 5) {
- while (ptr + 3 < endptr) {
-#ifdef WORDS_BIGENDIAN
- high0 = Bigendian_convert_uint(ref_blocks[ptr]);
- /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
- high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
- low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
-#else
- high0 = ref_blocks[ptr];
- /* low0 = ref_blocks[ptr+1]; */
- high1 = ref_blocks[ptr+3];
- low1 = ref_blocks[ptr+4];
- nextlow = ref_blocks[ptr+7];
-#endif
- if (mode == CMET_STRANDED) {
- high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
- high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
- high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
- high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK7; /* 31 */
+ counts[masked] += 1;
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
- high0_rev = nexthigh_rev; /* depended on low0 */
- low0_rev = reverse_nt[high0 >> 16];
- low0_rev |= (reverse_nt[high0 & 0x0000FFFF] << 16);
- high1_rev = reverse_nt[low1 >> 16];
- high1_rev |= (reverse_nt[low1 & 0x0000FFFF] << 16);
- low1_rev = reverse_nt[high1 >> 16];
- low1_rev |= (reverse_nt[high1 & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = (oligo >> 2) & MASK7; /* 30 */
+ counts[masked] += 1;
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
- current = _mm_setr_epi32(high0_rev,low0_rev,high1_rev,low1_rev);
-#ifdef HAVE_SSE4_1
- temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
- next = _mm_shuffle_epi32(temp,0x39);
-#else
- next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
-#endif
+ masked = (oligo >> 4) & MASK7; /* 29 */
+ counts[masked] += 1;
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
- extract_5mers_fwd_simd(array,current,next);
- count_fwdrev_simd(counts,(Genomecomp_T *) array);
- ptr += 6;
- }
+ masked = (oligo >> 6) & MASK7; /* 28 */
+ counts[masked] += 1;
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 8) & MASK7; /* 27 */
+ counts[masked] += 1;
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 10) & MASK7; /* 26 */
+ counts[masked] += 1;
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
- if (ptr < endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- /* low = Bigendian_convert_uint(ref_blocks[ptr+1]); */
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
#else
- high = ref_blocks[ptr];
- /* low = ref_blocks[ptr+1]; */
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); /* low = Cmet_reduce_ga(low); */ nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask7);
- high_rev = nexthigh_rev; /* depended on low */
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
- count_5mers_fwd(counts,high_rev,low_rev,nexthigh_rev);
- ptr += 3;
- }
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
- } else {
- abort();
- }
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- /* low = Bigendian_convert_uint(ref_blocks[ptr+1]); */
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- /* low = ref_blocks[ptr+1]; */
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); /* low = Cmet_reduce_ga(low); */ nextlow = Cmet_reduce_ga(nextlow);
- }
- }
- high_rev = nexthigh_rev; /* depended on low */
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
- if (indexsize == 8) {
- count_8mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
- } else if (indexsize == 7) {
- count_7mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
- } else if (indexsize == 6) {
- count_6mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
- } else if (indexsize == 5) {
- count_5mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
- } else {
- abort();
- }
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
- }
-
- return;
-}
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
#endif
-#if (!defined(USE_SIMD_FOR_COUNTS) || defined(DEBUG14))
-static void
-store_positions_fwd_std (Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts, int indexsize,
- Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos,
- int genestrand) {
- int startdiscard, enddiscard;
- Genomecomp_T ptr, startptr, endptr, high_rev, low_rev, nexthigh_rev,
- low, high, nextlow;
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rev & MASK7; /* 25 */
+ counts[masked] += 1;
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
+ masked = (low_rev >> 2) & MASK7; /* 24 */
+ counts[masked] += 1;
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
- left_plus_length -= indexsize;
-#if 0
- /* No. Extends past end. */
- left_plus_length += 1; /* Needed to get last oligomer to match */
-#endif
+ masked = (low_rev >> 4) & MASK7; /* 23 */
+ counts[masked] += 1;
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
- ptr = startptr = left/32U*3;
- endptr = left_plus_length/32U*3;
- startdiscard = left % 32; /* (left+pos5) % 32 */
- enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
-
- if (left_plus_length <= left) {
- /* Skip */
+ masked = (low_rev >> 6) & MASK7; /* 22 */
+ counts[masked] += 1;
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
- } else if (startptr == endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+ masked = (low_rev >> 8) & MASK7; /* 21 */
+ counts[masked] += 1;
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
- high_rev = reverse_nt[low >> 16];
- high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = (low_rev >> 10) & MASK7; /* 20 */
+ counts[masked] += 1;
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
- if (indexsize == 8) {
- chrpos = store_8mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
- } else if (indexsize == 7) {
- chrpos = store_7mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
- } else if (indexsize == 6) {
- chrpos = store_6mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
- } else if (indexsize == 5) {
- chrpos = store_5mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
- } else {
- fprintf(stderr,"indexsize %d not supported\n",indexsize);
- abort();
- }
+ masked = (low_rev >> 12) & MASK7; /* 19 */
+ counts[masked] += 1;
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
- } else {
- /* Genome_print_blocks(ref_blocks,left,left+16); */
+ masked = (low_rev >> 14) & MASK7; /* 18 */
+ counts[masked] += 1;
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 16) & MASK7; /* 17 */
+ counts[masked] += 1;
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
+
+ masked = low_rev >> 18; /* 16, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("16 %04X => %d\n",masked,counts[masked]));
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+ _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask7);
- high_rev = reverse_nt[low >> 16];
- high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
- if (indexsize == 8) {
- chrpos = store_8mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
- } else if (indexsize == 7) {
- chrpos = store_7mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
- } else if (indexsize == 6) {
- chrpos = store_6mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
- } else if (indexsize == 5) {
- chrpos = store_5mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
- } else {
- fprintf(stderr,"indexsize %d not supported\n",indexsize);
- abort();
- }
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
- ptr += 3;
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
- if (indexsize == 8) {
- while (ptr < endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
- high_rev = reverse_nt[low >> 16];
- high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
- chrpos = store_8mers_fwd(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev);
- ptr += 3;
- }
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
- } else if (indexsize == 7) {
- while (ptr < endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
- high_rev = reverse_nt[low >> 16];
- high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
- chrpos = store_7mers_fwd(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev);
- ptr += 3;
- }
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
- } else if (indexsize == 6) {
- while (ptr < endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
- high_rev = reverse_nt[low >> 16];
- high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
- chrpos = store_6mers_fwd(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev);
- ptr += 3;
- }
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
- } else if (indexsize == 5) {
- while (ptr < endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- nextlow = ref_blocks[ptr+4];
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("16 %04X => %d\n",masked,counts[masked]));
#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
- }
- }
- high_rev = reverse_nt[low >> 16];
- high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
- chrpos = store_5mers_fwd(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev);
- ptr += 3;
- }
- } else {
- abort();
- }
+ oligo = low_rev >> 20; /* For 15..10 */
+ oligo |= high_rev << 12;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK7; /* 15 */
+ counts[masked] += 1;
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
+ masked = (oligo >> 2) & MASK7; /* 14 */
+ counts[masked] += 1;
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 4) & MASK7; /* 13 */
+ counts[masked] += 1;
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 6) & MASK7; /* 12 */
+ counts[masked] += 1;
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 8) & MASK7; /* 11 */
+ counts[masked] += 1;
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 10) & MASK7; /* 10 */
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask7);
- high_rev = reverse_nt[low >> 16];
- high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
- if (indexsize == 8) {
- chrpos = store_8mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
- } else if (indexsize == 7) {
- chrpos = store_7mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
- } else if (indexsize == 6) {
- chrpos = store_6mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
- } else if (indexsize == 5) {
- chrpos = store_5mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
- } else {
- abort();
- }
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
- }
-
- return;
-}
-#endif
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
-#ifdef USE_SIMD_FOR_COUNTS
-static void
-store_positions_fwd_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts, int indexsize,
- Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos,
- int genestrand) {
- int startdiscard, enddiscard;
- Genomecomp_T ptr, startptr, endptr, high_rev, low_rev, nexthigh_rev,
- low, high, nextlow;
- Genomecomp_T high0_rev, low0_rev, high1_rev, low1_rev, /* low0, */ high0, low1, high1;
- __m128i current, next;
- __m128i array[16];
-#ifdef HAVE_SSE4_1
- __m128i temp;
-#endif
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
- debug(printf("Starting store_positions_fwd_simd\n"));
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
- left_plus_length -= indexsize;
-#if 0
- /* No. Extends past end. */
- left_plus_length += 1; /* Needed to get last oligomer to match */
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
#endif
- ptr = startptr = left/32U*3;
- endptr = left_plus_length/32U*3;
- startdiscard = left % 32; /* (left+pos5) % 32 */
- enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
-
- if (left_plus_length <= left) {
- /* Skip */
- } else if (startptr == endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rev & MASK7; /* 9 */
+ counts[masked] += 1;
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
- high_rev = reverse_nt[low >> 16];
- high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = (high_rev >> 2) & MASK7; /* 8 */
+ counts[masked] += 1;
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
- if (indexsize == 8) {
- chrpos = store_8mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
- } else if (indexsize == 7) {
- chrpos = store_7mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
- } else if (indexsize == 6) {
- chrpos = store_6mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
- } else if (indexsize == 5) {
- chrpos = store_5mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
- } else {
- fprintf(stderr,"indexsize %d not supported\n",indexsize);
- abort();
- }
+ masked = (high_rev >> 4) & MASK7; /* 7 */
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
- } else {
- /* Genome_print_blocks(ref_blocks,left,left+16); */
+ masked = (high_rev >> 6) & MASK7; /* 6 */
+ counts[masked] += 1;
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+ masked = (high_rev >> 8) & MASK7; /* 5 */
+ counts[masked] += 1;
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
- high_rev = reverse_nt[low >> 16];
- high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = (high_rev >> 10) & MASK7; /* 4 */
+ counts[masked] += 1;
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
- if (indexsize == 8) {
- chrpos = store_8mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
- } else if (indexsize == 7) {
- chrpos = store_7mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
- } else if (indexsize == 6) {
- chrpos = store_6mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
- } else if (indexsize == 5) {
- chrpos = store_5mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
- } else {
- fprintf(stderr,"indexsize %d not supported\n",indexsize);
- abort();
- }
+ masked = (high_rev >> 12) & MASK7; /* 3 */
+ counts[masked] += 1;
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
- ptr += 3;
+ masked = (high_rev >> 14) & MASK7; /* 2 */
+ counts[masked] += 1;
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
- if (indexsize == 8) {
- while (ptr + 3 < endptr) {
-#ifdef WORDS_BIGENDIAN
- high0 = Bigendian_convert_uint(ref_blocks[ptr]);
- /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
- high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
- low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
-#else
- high0 = ref_blocks[ptr];
- /* low0 = ref_blocks[ptr+1]; */
- high1 = ref_blocks[ptr+3];
- low1 = ref_blocks[ptr+4];
- nextlow = ref_blocks[ptr+7];
-#endif
- if (mode == CMET_STRANDED) {
- high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
- high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
- high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
- high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+ masked = (high_rev >> 16) & MASK7; /* 1 */
+ counts[masked] += 1;
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
- high0_rev = nexthigh_rev; /* depended on low0 */
- low0_rev = reverse_nt[high0 >> 16];
- low0_rev |= (reverse_nt[high0 & 0x0000FFFF] << 16);
- high1_rev = reverse_nt[low1 >> 16];
- high1_rev |= (reverse_nt[low1 & 0x0000FFFF] << 16);
- low1_rev = reverse_nt[high1 >> 16];
- low1_rev |= (reverse_nt[high1 & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = high_rev >> 18; /* 0, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
- current = _mm_setr_epi32(high0_rev,low0_rev,high1_rev,low1_rev);
-#ifdef HAVE_SSE4_1
- temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
- next = _mm_shuffle_epi32(temp,0x39);
#else
- next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
-#endif
+ _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask7);
- extract_8mers_fwd_simd(array,current,next);
- chrpos = store_fwdrev_simd(chrpos,pointers,positions,counts,(Genomecomp_T *) array);
- ptr += 6;
- }
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
- if (ptr < endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- /* low = Bigendian_convert_uint(ref_blocks[ptr+1]); */
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- /* low = ref_blocks[ptr+1]; */
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); /* low = Cmet_reduce_ga(low); */ nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
- high_rev = nexthigh_rev; /* depended on low */
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
- chrpos = store_8mers_fwd(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev);
- ptr += 3;
- }
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
- } else if (indexsize == 7) {
- while (ptr + 3 < endptr) {
-#ifdef WORDS_BIGENDIAN
- high0 = Bigendian_convert_uint(ref_blocks[ptr]);
- /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
- high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
- low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
-#else
- high0 = ref_blocks[ptr];
- /* low0 = ref_blocks[ptr+1]; */
- high1 = ref_blocks[ptr+3];
- low1 = ref_blocks[ptr+4];
- nextlow = ref_blocks[ptr+7];
-#endif
- if (mode == CMET_STRANDED) {
- high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
- high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
- high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
- high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- nextlow = Cmet_reduce_ga(nextlow);
- }
- }
- high0_rev = nexthigh_rev; /* depended on low0 */
- low0_rev = reverse_nt[high0 >> 16];
- low0_rev |= (reverse_nt[high0 & 0x0000FFFF] << 16);
- high1_rev = reverse_nt[low1 >> 16];
- high1_rev |= (reverse_nt[low1 & 0x0000FFFF] << 16);
- low1_rev = reverse_nt[high1 >> 16];
- low1_rev |= (reverse_nt[high1 & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
- current = _mm_setr_epi32(high0_rev,low0_rev,high1_rev,low1_rev);
-#ifdef HAVE_SSE4_1
- temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
- next = _mm_shuffle_epi32(temp,0x39);
-#else
- next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
-#endif
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
- extract_7mers_fwd_simd(array,current,next);
- chrpos = store_fwdrev_simd(chrpos,pointers,positions,counts,(Genomecomp_T *) array);
- ptr += 6;
- }
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
- if (ptr < endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- /* low = Bigendian_convert_uint(ref_blocks[ptr+1]); */
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- /* low = ref_blocks[ptr+1]; */
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); /* low = Cmet_reduce_ga(low); */ nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
- high_rev = nexthigh_rev; /* depended on low */
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
- chrpos = store_7mers_fwd(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev);
- ptr += 3;
- }
- } else if (indexsize == 6) {
- while (ptr + 3 < endptr) {
-#ifdef WORDS_BIGENDIAN
- high0 = Bigendian_convert_uint(ref_blocks[ptr]);
- /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
- high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
- low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
-#else
- high0 = ref_blocks[ptr];
- /* low0 = ref_blocks[ptr+1]; */
- high1 = ref_blocks[ptr+3];
- low1 = ref_blocks[ptr+4];
- nextlow = ref_blocks[ptr+7];
-#endif
- if (mode == CMET_STRANDED) {
- high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
- high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
- high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
- high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
- high0_rev = nexthigh_rev; /* depended on low0 */
- low0_rev = reverse_nt[high0 >> 16];
- low0_rev |= (reverse_nt[high0 & 0x0000FFFF] << 16);
- high1_rev = reverse_nt[low1 >> 16];
- high1_rev |= (reverse_nt[low1 & 0x0000FFFF] << 16);
- low1_rev = reverse_nt[high1 >> 16];
- low1_rev |= (reverse_nt[high1 & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
- current = _mm_setr_epi32(high0_rev,low0_rev,high1_rev,low1_rev);
-#ifdef HAVE_SSE4_1
- temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
- next = _mm_shuffle_epi32(temp,0x39);
-#else
- next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
#endif
- extract_6mers_fwd_simd(array,current,next);
- chrpos = store_fwdrev_simd(chrpos,pointers,positions,counts,(Genomecomp_T *) array);
- ptr += 6;
- }
+ return;
+}
- if (ptr < endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- /* low = Bigendian_convert_uint(ref_blocks[ptr+1]); */
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- /* low = ref_blocks[ptr+1]; */
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); /* low = Cmet_reduce_ga(low); */ nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+/* Expecting current to have {high0_rev, low0_rev, high1_rev,
+ low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and
+ high2_rev} */
+#ifdef USE_SIMD_FOR_COUNTS
+static void
+extract_7mers_fwd_simd (__m128i *out, __m128i current, __m128i next) {
+ __m128i oligo;
- high_rev = nexthigh_rev; /* depended on low */
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ _mm_store_si128(out++, _mm_srli_epi32(current,18)); /* No mask necessary */
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask7));
+ _mm_store_si128(out++, _mm_and_si128( current, mask7));
- chrpos = store_6mers_fwd(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev);
- ptr += 3;
- }
+ oligo = _mm_or_si128( _mm_srli_epi32(next,20), _mm_slli_epi32(current,12));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask7));
+ _mm_store_si128(out++, _mm_and_si128( oligo, mask7));
- } else if (indexsize == 5) {
- while (ptr + 3 < endptr) {
-#ifdef WORDS_BIGENDIAN
- high0 = Bigendian_convert_uint(ref_blocks[ptr]);
- /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
- high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
- low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
-#else
- high0 = ref_blocks[ptr];
- /* low0 = ref_blocks[ptr+1]; */
- high1 = ref_blocks[ptr+3];
- low1 = ref_blocks[ptr+4];
- nextlow = ref_blocks[ptr+7];
-#endif
- if (mode == CMET_STRANDED) {
- high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
- high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
- high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
- high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- nextlow = Cmet_reduce_ga(nextlow);
- }
- }
+ return;
+}
- high0_rev = nexthigh_rev; /* depended on low0 */
- low0_rev = reverse_nt[high0 >> 16];
- low0_rev |= (reverse_nt[high0 & 0x0000FFFF] << 16);
- high1_rev = reverse_nt[low1 >> 16];
- high1_rev |= (reverse_nt[low1 & 0x0000FFFF] << 16);
- low1_rev = reverse_nt[high1 >> 16];
- low1_rev |= (reverse_nt[high1 & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+static void
+count_7mers_fwd_simd (Count_T *counts, __m128i current, __m128i next) {
+ __m128i oligo;
+ Genomecomp_T array[4];
+
+ _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,18)); /* No mask necessary */
+ counts[array[0]] += 1; /* 0 */
+ counts[array[1]] += 1; /* 16 */
+ counts[array[2]] += 1; /* 32 */
+ counts[array[3]] += 1; /* 48 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,16), mask7));
+ counts[array[0]] += 1; /* 1 */
+ counts[array[1]] += 1; /* 17 */
+ counts[array[2]] += 1; /* 33 */
+ counts[array[3]] += 1; /* 49 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask7));
+ counts[array[0]] += 1; /* 2 */
+ counts[array[1]] += 1; /* 18 */
+ counts[array[2]] += 1; /* 34 */
+ counts[array[3]] += 1; /* 50 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask7));
+ counts[array[0]] += 1; /* 3 */
+ counts[array[1]] += 1; /* 19 */
+ counts[array[2]] += 1; /* 35 */
+ counts[array[3]] += 1; /* 51 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask7));
+ counts[array[0]] += 1; /* 4 */
+ counts[array[1]] += 1; /* 20 */
+ counts[array[2]] += 1; /* 36 */
+ counts[array[3]] += 1; /* 52 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask7));
+ counts[array[0]] += 1; /* 5 */
+ counts[array[1]] += 1; /* 21 */
+ counts[array[2]] += 1; /* 37 */
+ counts[array[3]] += 1; /* 53 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask7));
+ counts[array[0]] += 1; /* 6 */
+ counts[array[1]] += 1; /* 22 */
+ counts[array[2]] += 1; /* 38 */
+ counts[array[3]] += 1; /* 54 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask7));
+ counts[array[0]] += 1; /* 7 */
+ counts[array[1]] += 1; /* 23 */
+ counts[array[2]] += 1; /* 39 */
+ counts[array[3]] += 1; /* 55 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask7));
+ counts[array[0]] += 1; /* 8 */
+ counts[array[1]] += 1; /* 24 */
+ counts[array[2]] += 1; /* 40 */
+ counts[array[3]] += 1; /* 56 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask7));
+ counts[array[0]] += 1; /* 9 */
+ counts[array[1]] += 1; /* 25 */
+ counts[array[2]] += 1; /* 41 */
+ counts[array[3]] += 1; /* 57 */
+
+
+ oligo = _mm_or_si128( _mm_srli_epi32(next,20), _mm_slli_epi32(current,12));
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,10), mask7));
+ counts[array[0]] += 1; /* 10 */
+ counts[array[1]] += 1; /* 26 */
+ counts[array[2]] += 1; /* 42 */
+ counts[array[3]] += 1; /* 58 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask7));
+ counts[array[0]] += 1; /* 11 */
+ counts[array[1]] += 1; /* 27 */
+ counts[array[2]] += 1; /* 43 */
+ counts[array[3]] += 1; /* 59 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask7));
+ counts[array[0]] += 1; /* 12 */
+ counts[array[1]] += 1; /* 28 */
+ counts[array[2]] += 1; /* 44 */
+ counts[array[3]] += 1; /* 60 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask7));
+ counts[array[0]] += 1; /* 13 */
+ counts[array[1]] += 1; /* 29 */
+ counts[array[2]] += 1; /* 45 */
+ counts[array[3]] += 1; /* 61 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask7));
+ counts[array[0]] += 1; /* 14 */
+ counts[array[1]] += 1; /* 30 */
+ counts[array[2]] += 1; /* 46 */
+ counts[array[3]] += 1; /* 62 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask7));
+ counts[array[0]] += 1; /* 15 */
+ counts[array[1]] += 1; /* 31 */
+ counts[array[2]] += 1; /* 47 */
+ counts[array[3]] += 1; /* 63 */
+
+ return;
+}
- current = _mm_setr_epi32(high0_rev,low0_rev,high1_rev,low1_rev);
-#ifdef HAVE_SSE4_1
- temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
- next = _mm_shuffle_epi32(temp,0x39);
-#else
- next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
#endif
- extract_5mers_fwd_simd(array,current,next);
- chrpos = store_fwdrev_simd(chrpos,pointers,positions,counts,(Genomecomp_T *) array);
- ptr += 6;
- }
- if (ptr < endptr) {
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- /* low = Bigendian_convert_uint(ref_blocks[ptr+1]); */
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- /* low = ref_blocks[ptr+1]; */
- nextlow = ref_blocks[ptr+4];
+static int
+store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+ Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+ Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
+
+
+ oligo = nexthigh_rev >> 20; /* For 31..26 */
+ oligo |= low_rev << 12;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK7; /* 31 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+
+ masked = (oligo >> 2) & MASK7; /* 30 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 1;
+ }
+ }
+
+ masked = (oligo >> 4) & MASK7; /* 29 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 2;
+ }
+ }
+
+ masked = (oligo >> 6) & MASK7; /* 28 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 3;
+ }
+ }
+
+ masked = (oligo >> 8) & MASK7; /* 27 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 4;
+ }
+ }
+
+ masked = (oligo >> 10) & MASK7; /* 26 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 5;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 1;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 2;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 3;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 4;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 5;
+ }
+ }
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rev & MASK7; /* 25 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 6;
+ }
+ }
+
+ masked = (low_rev >> 2) & MASK7; /* 24 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 7;
+ }
+ }
+
+ masked = (low_rev >> 4) & MASK7; /* 23 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 8;
+ }
+ }
+
+ masked = (low_rev >> 6) & MASK7; /* 22 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 9;
+ }
+ }
+
+ masked = (low_rev >> 8) & MASK7; /* 21 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 10;
+ }
+ }
+
+ masked = (low_rev >> 10) & MASK7; /* 20 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 11;
+ }
+ }
+
+ masked = (low_rev >> 12) & MASK7; /* 19 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 12;
+ }
+ }
+
+ masked = (low_rev >> 14) & MASK7; /* 18 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 13;
+ }
+ }
+
+ masked = (low_rev >> 16) & MASK7; /* 17 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 14;
+ }
+ }
+
+ masked = low_rev >> 18; /* 16, No mask necessary */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 15;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 6;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 7;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 8;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 9;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 10;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 11;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 12;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 13;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 14;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 15;
+ }
+ }
+#endif
+
+
+ oligo = low_rev >> 20; /* For 15..10 */
+ oligo |= high_rev << 12;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK7; /* 15 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 16;
+ }
+ }
+
+ masked = (oligo >> 2) & MASK7; /* 14 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 17;
+ }
+ }
+
+ masked = (oligo >> 4) & MASK7; /* 13 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 18;
+ }
+ }
+
+ masked = (oligo >> 6) & MASK7; /* 12 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 19;
+ }
+ }
+
+ masked = (oligo >> 8) & MASK7; /* 11 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 20;
+ }
+ }
+
+ masked = (oligo >> 10) & MASK7; /* 10 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 21;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 16;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 17;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 18;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 19;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 20;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 21;
+ }
+ }
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rev & MASK7; /* 9 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 22;
+ }
+ }
+
+ masked = (high_rev >> 2) & MASK7; /* 8 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 23;
+ }
+ }
+
+ masked = (high_rev >> 4) & MASK7; /* 7 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 24;
+ }
+ }
+
+ masked = (high_rev >> 6) & MASK7; /* 6 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 25;
+ }
+ }
+
+ masked = (high_rev >> 8) & MASK7; /* 5 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 26;
+ }
+ }
+
+ masked = (high_rev >> 10) & MASK7; /* 4 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 27;
+ }
+ }
+
+ masked = (high_rev >> 12) & MASK7; /* 3 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 28;
+ }
+ }
+
+ masked = (high_rev >> 14) & MASK7; /* 2 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 29;
+ }
+ }
+
+ masked = (high_rev >> 16) & MASK7; /* 1 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 30;
+ }
+ }
+
+ masked = high_rev >> 18; /* 0, No mask necessary */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 31;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 22;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 23;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 24;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 25;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 26;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 27;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 28;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 29;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 30;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 31;
+ }
+ }
+#endif
+
+ return chrpos - 32;
+}
+
+
+static void
+count_6mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+ Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
+
+
+ oligo = nexthigh_rev >> 22; /* For 31..27 */
+ oligo |= low_rev << 10;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK6; /* 31 */
+ counts[masked] += 1;
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 2) & MASK6; /* 30 */
+ counts[masked] += 1;
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 4) & MASK6; /* 29 */
+ counts[masked] += 1;
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 6) & MASK6; /* 28 */
+ counts[masked] += 1;
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 8) & MASK6; /* 27 */
+ counts[masked] += 1;
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
+
+
+ masked = (oligo >> 8) & MASK6; /* 27 */
+ counts[masked] += 1;
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rev & MASK6; /* 26 */
+ counts[masked] += 1;
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 2) & MASK6; /* 25 */
+ counts[masked] += 1;
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 4) & MASK6; /* 24 */
+ counts[masked] += 1;
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 6) & MASK6; /* 23 */
+ counts[masked] += 1;
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 8) & MASK6; /* 22 */
+ counts[masked] += 1;
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 10) & MASK6; /* 21 */
+ counts[masked] += 1;
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 12) & MASK6; /* 20 */
+ counts[masked] += 1;
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 14) & MASK6; /* 19 */
+ counts[masked] += 1;
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 16) & MASK6; /* 18 */
+ counts[masked] += 1;
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 18) & MASK6; /* 17 */
+ counts[masked] += 1;
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
+
+ masked = low_rev >> 20; /* 16, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("16 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("16 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+ oligo = low_rev >> 22; /* For 15..11 */
+ oligo |= high_rev << 10;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK6; /* 15 */
+ counts[masked] += 1;
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 2) & MASK6; /* 14 */
+ counts[masked] += 1;
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 4) & MASK6; /* 13 */
+ counts[masked] += 1;
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 6) & MASK6; /* 12 */
+ counts[masked] += 1;
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 8) & MASK6; /* 11 */
+ counts[masked] += 1;
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
+
+
+ masked = (oligo >> 8) & MASK6; /* 11 */
+ counts[masked] += 1;
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rev & MASK6; /* 10 */
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 2) & MASK6; /* 9 */
+ counts[masked] += 1;
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 4) & MASK6; /* 8 */
+ counts[masked] += 1;
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 6) & MASK6; /* 7 */
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 8) & MASK6; /* 6 */
+ counts[masked] += 1;
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 10) & MASK6; /* 5 */
+ counts[masked] += 1;
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 12) & MASK6; /* 4 */
+ counts[masked] += 1;
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 14) & MASK6; /* 3 */
+ counts[masked] += 1;
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 16) & MASK6; /* 2 */
+ counts[masked] += 1;
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 18) & MASK6; /* 1 */
+ counts[masked] += 1;
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+ masked = high_rev >> 20; /* 0, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
+#endif
+
+ return;
+}
+
+
+/* Expecting current to have {high0_rev, low0_rev, high1_rev,
+ low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and
+ high2_rev} */
+#ifdef USE_SIMD_FOR_COUNTS
+static void
+extract_6mers_fwd_simd (__m128i *out, __m128i current, __m128i next) {
+ __m128i oligo;
+
+ _mm_store_si128(out++, _mm_srli_epi32(current,20)); /* No mask necessary */;
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask6));
+ _mm_store_si128(out++, _mm_and_si128( current, mask6));
+
+ oligo = _mm_or_si128( _mm_srli_epi32(next,22), _mm_slli_epi32(current,10));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask6));
+ _mm_store_si128(out++, _mm_and_si128( oligo, mask6));
+
+ return;
+}
+
+static void
+count_6mers_fwd_simd (Count_T *counts, __m128i current, __m128i next) {
+ __m128i oligo;
+ Genomecomp_T array[4];
+
+ _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,20)); /* No mask necessary */;
+ counts[array[0]] += 1; /* 0 */
+ counts[array[1]] += 1; /* 16 */
+ counts[array[2]] += 1; /* 32 */
+ counts[array[3]] += 1; /* 48 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,18), mask6));
+ counts[array[0]] += 1; /* 1 */
+ counts[array[1]] += 1; /* 17 */
+ counts[array[2]] += 1; /* 33 */
+ counts[array[3]] += 1; /* 49 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,16), mask6));
+ counts[array[0]] += 1; /* 2 */
+ counts[array[1]] += 1; /* 18 */
+ counts[array[2]] += 1; /* 34 */
+ counts[array[3]] += 1; /* 50 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask6));
+ counts[array[0]] += 1; /* 3 */
+ counts[array[1]] += 1; /* 19 */
+ counts[array[2]] += 1; /* 35 */
+ counts[array[3]] += 1; /* 51 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask6));
+ counts[array[0]] += 1; /* 4 */
+ counts[array[1]] += 1; /* 20 */
+ counts[array[2]] += 1; /* 36 */
+ counts[array[3]] += 1; /* 52 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask6));
+ counts[array[0]] += 1; /* 5 */
+ counts[array[1]] += 1; /* 21 */
+ counts[array[2]] += 1; /* 37 */
+ counts[array[3]] += 1; /* 53 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask6));
+ counts[array[0]] += 1; /* 6 */
+ counts[array[1]] += 1; /* 22 */
+ counts[array[2]] += 1; /* 38 */
+ counts[array[3]] += 1; /* 54 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask6));
+ counts[array[0]] += 1; /* 7 */
+ counts[array[1]] += 1; /* 23 */
+ counts[array[2]] += 1; /* 39 */
+ counts[array[3]] += 1; /* 55 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask6));
+ counts[array[0]] += 1; /* 8 */
+ counts[array[1]] += 1; /* 24 */
+ counts[array[2]] += 1; /* 40 */
+ counts[array[3]] += 1; /* 56 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask6));
+ counts[array[0]] += 1; /* 9 */
+ counts[array[1]] += 1; /* 25 */
+ counts[array[2]] += 1; /* 41 */
+ counts[array[3]] += 1; /* 57 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask6));
+ counts[array[0]] += 1; /* 10 */
+ counts[array[1]] += 1; /* 26 */
+ counts[array[2]] += 1; /* 42 */
+ counts[array[3]] += 1; /* 58 */
+
+
+ oligo = _mm_or_si128( _mm_srli_epi32(next,22), _mm_slli_epi32(current,10));
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask6));
+ counts[array[0]] += 1; /* 11 */
+ counts[array[1]] += 1; /* 27 */
+ counts[array[2]] += 1; /* 43 */
+ counts[array[3]] += 1; /* 59 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask6));
+ counts[array[0]] += 1; /* 12 */
+ counts[array[1]] += 1; /* 28 */
+ counts[array[2]] += 1; /* 44 */
+ counts[array[3]] += 1; /* 60 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask6));
+ counts[array[0]] += 1; /* 13 */
+ counts[array[1]] += 1; /* 29 */
+ counts[array[2]] += 1; /* 45 */
+ counts[array[3]] += 1; /* 61 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask6));
+ counts[array[0]] += 1; /* 14 */
+ counts[array[1]] += 1; /* 30 */
+ counts[array[2]] += 1; /* 46 */
+ counts[array[3]] += 1; /* 62 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask6));
+ counts[array[0]] += 1; /* 15 */
+ counts[array[1]] += 1; /* 31 */
+ counts[array[2]] += 1; /* 47 */
+ counts[array[3]] += 1; /* 63 */
+
+ return;
+}
+
+#endif
+
+
+static int
+store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+ Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+ Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
+
+
+ oligo = nexthigh_rev >> 22; /* For 27..31 */
+ oligo |= low_rev << 10;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK6; /* 31 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+
+ masked = (oligo >> 2) & MASK6; /* 30 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 1;
+ }
+ }
+
+ masked = (oligo >> 4) & MASK6; /* 29 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 2;
+ }
+ }
+
+ masked = (oligo >> 6) & MASK6; /* 28 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 3;
+ }
+ }
+
+ masked = (oligo >> 8) & MASK6; /* 27 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 4;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 1;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 2;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 3;
+ }
+ }
+
+
+ masked = (oligo >> 8) & MASK6; /* 27 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 4;
+ }
+ }
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rev & MASK6; /* 26 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 5;
+ }
+ }
+
+ masked = (low_rev >> 2) & MASK6; /* 25 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 6;
+ }
+ }
+
+ masked = (low_rev >> 4) & MASK6; /* 24 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 7;
+ }
+ }
+
+ masked = (low_rev >> 6) & MASK6; /* 23 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 8;
+ }
+ }
+
+ masked = (low_rev >> 8) & MASK6; /* 22 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 9;
+ }
+ }
+
+ masked = (low_rev >> 10) & MASK6; /* 21 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 10;
+ }
+ }
+
+ masked = (low_rev >> 12) & MASK6; /* 20 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 11;
+ }
+ }
+
+ masked = (low_rev >> 14) & MASK6; /* 19 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 12;
+ }
+ }
+
+ masked = (low_rev >> 16) & MASK6; /* 18 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 13;
+ }
+ }
+
+ masked = (low_rev >> 18) & MASK6; /* 17 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 14;
+ }
+ }
+
+ masked = low_rev >> 20; /* 16, No mask necessary */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 15;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 5;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 6;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 7;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 8;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 9;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 10;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 11;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 12;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 13;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 14;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 15;
+ }
+ }
+#endif
+
+
+ oligo = low_rev >> 22; /* For 15..11 */
+ oligo |= high_rev << 10;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK6; /* 15 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 16;
+ }
+ }
+
+ masked = (oligo >> 2) & MASK6; /* 14 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 17;
+ }
+ }
+
+ masked = (oligo >> 4) & MASK6; /* 13 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 18;
+ }
+ }
+
+ masked = (oligo >> 6) & MASK6; /* 12 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 19;
+ }
+ }
+
+ masked = (oligo >> 8) & MASK6; /* 11 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 20;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 16;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 17;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 18;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 19;
+ }
+ }
+
+
+ masked = (oligo >> 8) & MASK6; /* 11 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 20;
+ }
+ }
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rev & MASK6; /* 10 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 21;
+ }
+ }
+
+ masked = (high_rev >> 2) & MASK6; /* 9 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 22;
+ }
+ }
+
+ masked = (high_rev >> 4) & MASK6; /* 8 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 23;
+ }
+ }
+
+ masked = (high_rev >> 6) & MASK6; /* 7 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 24;
+ }
+ }
+
+ masked = (high_rev >> 8) & MASK6; /* 6 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 25;
+ }
+ }
+
+ masked = (high_rev >> 10) & MASK6; /* 5 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 26;
+ }
+ }
+
+ masked = (high_rev >> 12) & MASK6; /* 4 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 27;
+ }
+ }
+
+ masked = (high_rev >> 14) & MASK6; /* 3 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 28;
+ }
+ }
+
+ masked = (high_rev >> 16) & MASK6; /* 2 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 29;
+ }
+ }
+
+ masked = (high_rev >> 18) & MASK6; /* 1 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 30;
+ }
+ }
+
+ masked = high_rev >> 20; /* 0, No mask necessary */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 31;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 21;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 22;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 23;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 24;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 25;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 26;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 27;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 28;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 29;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 30;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 31;
+ }
+ }
+#endif
+
+
+ return chrpos - 32;
+}
+
+
+static void
+count_5mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+ Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
+
+
+ oligo = nexthigh_rev >> 24; /* For 31..28 */
+ oligo |= low_rev << 8;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK5; /* 31 */
+ counts[masked] += 1;
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 2) & MASK5; /* 30 */
+ counts[masked] += 1;
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 4) & MASK5; /* 29 */
+ counts[masked] += 1;
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 6) & MASK5; /* 28 */
+ counts[masked] += 1;
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rev & MASK5; /* 27 */
+ counts[masked] += 1;
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 2) & MASK5; /* 26 */
+ counts[masked] += 1;
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 4) & MASK5; /* 25 */
+ counts[masked] += 1;
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 6) & MASK5; /* 24 */
+ counts[masked] += 1;
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 8) & MASK5; /* 23 */
+ counts[masked] += 1;
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 10) & MASK5; /* 22 */
+ counts[masked] += 1;
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 12) & MASK5; /* 21 */
+ counts[masked] += 1;
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 14) & MASK5; /* 20 */
+ counts[masked] += 1;
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 16) & MASK5; /* 19 */
+ counts[masked] += 1;
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 18) & MASK5; /* 18 */
+ counts[masked] += 1;
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rev >> 20) & MASK5; /* 17 */
+ counts[masked] += 1;
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
+
+ masked = low_rev >> 22; /* 16, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("16 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("16 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+ oligo = low_rev >> 24; /* For 15..12 */
+ oligo |= high_rev << 8;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK5; /* 15 */
+ counts[masked] += 1;
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 2) & MASK5; /* 14 */
+ counts[masked] += 1;
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 4) & MASK5; /* 13 */
+ counts[masked] += 1;
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 6) & MASK5; /* 12 */
+ counts[masked] += 1;
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rev & MASK5; /* 11 */
+ counts[masked] += 1;
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 2) & MASK5; /* 10 */
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 4) & MASK5; /* 9 */
+ counts[masked] += 1;
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 6) & MASK5; /* 8 */
+ counts[masked] += 1;
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 8) & MASK5; /* 7 */
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 10) & MASK5; /* 6 */
+ counts[masked] += 1;
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 12) & MASK5; /* 5 */
+ counts[masked] += 1;
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 14) & MASK5; /* 4 */
+ counts[masked] += 1;
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 16) & MASK5; /* 3 */
+ counts[masked] += 1;
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 18) & MASK5; /* 2 */
+ counts[masked] += 1;
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rev >> 20) & MASK5; /* 1 */
+ counts[masked] += 1;
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+ masked = high_rev >> 22; /* 0, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
+#endif
+
+ return;
+}
+
+
+#ifdef USE_SIMD_FOR_COUNTS
+static void
+extract_5mers_fwd_simd (__m128i *out, __m128i current, __m128i next) {
+ __m128i oligo;
+
+ _mm_store_si128(out++, _mm_srli_epi32(current,22)); /* No mask necessary */
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,20), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask5));
+ _mm_store_si128(out++, _mm_and_si128( current, mask5));
+
+ oligo = _mm_or_si128( _mm_srli_epi32(next,24), _mm_slli_epi32(current,8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask5));
+ _mm_store_si128(out++, _mm_and_si128( oligo, mask5));
+
+ return;
+}
+
+static void
+count_5mers_fwd_simd (Count_T *counts, __m128i current, __m128i next) {
+ __m128i oligo;
+ Genomecomp_T array[4];
+
+ _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,22)); /* No mask necessary */
+ counts[array[0]] += 1; /* 0 */
+ counts[array[1]] += 1; /* 16 */
+ counts[array[2]] += 1; /* 32 */
+ counts[array[3]] += 1; /* 48 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,20), mask5));
+ counts[array[0]] += 1; /* 1 */
+ counts[array[1]] += 1; /* 17 */
+ counts[array[2]] += 1; /* 33 */
+ counts[array[3]] += 1; /* 49 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,18), mask5));
+ counts[array[0]] += 1; /* 2 */
+ counts[array[1]] += 1; /* 18 */
+ counts[array[2]] += 1; /* 34 */
+ counts[array[3]] += 1; /* 50 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,16), mask5));
+ counts[array[0]] += 1; /* 3 */
+ counts[array[1]] += 1; /* 19 */
+ counts[array[2]] += 1; /* 35 */
+ counts[array[3]] += 1; /* 51 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask5));
+ counts[array[0]] += 1; /* 4 */
+ counts[array[1]] += 1; /* 20 */
+ counts[array[2]] += 1; /* 36 */
+ counts[array[3]] += 1; /* 52 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask5));
+ counts[array[0]] += 1; /* 5 */
+ counts[array[1]] += 1; /* 21 */
+ counts[array[2]] += 1; /* 37 */
+ counts[array[3]] += 1; /* 53 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask5));
+ counts[array[0]] += 1; /* 6 */
+ counts[array[1]] += 1; /* 22 */
+ counts[array[2]] += 1; /* 38 */
+ counts[array[3]] += 1; /* 54 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask5));
+ counts[array[0]] += 1; /* 7 */
+ counts[array[1]] += 1; /* 23 */
+ counts[array[2]] += 1; /* 39 */
+ counts[array[3]] += 1; /* 55 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask5));
+ counts[array[0]] += 1; /* 8 */
+ counts[array[1]] += 1; /* 24 */
+ counts[array[2]] += 1; /* 40 */
+ counts[array[3]] += 1; /* 56 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask5));
+ counts[array[0]] += 1; /* 9 */
+ counts[array[1]] += 1; /* 25 */
+ counts[array[2]] += 1; /* 41 */
+ counts[array[3]] += 1; /* 57 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask5));
+ counts[array[0]] += 1; /* 10 */
+ counts[array[1]] += 1; /* 26 */
+ counts[array[2]] += 1; /* 42 */
+ counts[array[3]] += 1; /* 58 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask5));
+ counts[array[0]] += 1; /* 11 */
+ counts[array[1]] += 1; /* 27 */
+ counts[array[2]] += 1; /* 43 */
+ counts[array[3]] += 1; /* 59 */
+
+
+ oligo = _mm_or_si128( _mm_srli_epi32(next,24), _mm_slli_epi32(current,8));
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask5));
+ counts[array[0]] += 1; /* 12 */
+ counts[array[1]] += 1; /* 28 */
+ counts[array[2]] += 1; /* 44 */
+ counts[array[3]] += 1; /* 60 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask5));
+ counts[array[0]] += 1; /* 13 */
+ counts[array[1]] += 1; /* 29 */
+ counts[array[2]] += 1; /* 45 */
+ counts[array[3]] += 1; /* 61 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask5));
+ counts[array[0]] += 1; /* 14 */
+ counts[array[1]] += 1; /* 30 */
+ counts[array[2]] += 1; /* 46 */
+ counts[array[3]] += 1; /* 62 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask5));
+ counts[array[0]] += 1; /* 15 */
+ counts[array[1]] += 1; /* 31 */
+ counts[array[2]] += 1; /* 47 */
+ counts[array[3]] += 1; /* 63 */
+
+ return;
+}
+
+#endif
+
+
+static int
+store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+ Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+ Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
+
+
+ oligo = nexthigh_rev >> 24; /* For 31..28 */
+ oligo |= low_rev << 8;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK5; /* 31 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+
+ masked = (oligo >> 2) & MASK5; /* 30 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 1;
+ }
+ }
+
+ masked = (oligo >> 4) & MASK5; /* 29 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 2;
+ }
+ }
+
+ masked = (oligo >> 6) & MASK5; /* 28 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 3;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 1;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 2;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 3;
+ }
+ }
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rev & MASK5; /* 27 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 4;
+ }
+ }
+
+ masked = (low_rev >> 2) & MASK5; /* 26 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 5;
+ }
+ }
+
+ masked = (low_rev >> 4) & MASK5; /* 25 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 6;
+ }
+ }
+
+ masked = (low_rev >> 6) & MASK5; /* 24 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 7;
+ }
+ }
+
+ masked = (low_rev >> 8) & MASK5; /* 23 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 8;
+ }
+ }
+
+ masked = (low_rev >> 10) & MASK5; /* 22 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 9;
+ }
+ }
+
+ masked = (low_rev >> 12) & MASK5; /* 21 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 10;
+ }
+ }
+
+ masked = (low_rev >> 14) & MASK5; /* 20 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 11;
+ }
+ }
+
+ masked = (low_rev >> 16) & MASK5; /* 19 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 12;
+ }
+ }
+
+ masked = (low_rev >> 18) & MASK5; /* 18 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 13;
+ }
+ }
+
+ masked = (low_rev >> 20) & MASK5; /* 17 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 14;
+ }
+ }
+
+ masked = low_rev >> 22; /* 16, No mask necessary */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 15;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 4;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 5;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 6;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 7;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 8;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 9;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 10;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 11;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 12;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 13;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 14;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 15;
+ }
+ }
+#endif
+
+
+ oligo = low_rev >> 24; /* For 15..12 */
+ oligo |= high_rev << 8;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK5; /* 15 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 16;
+ }
+ }
+
+ masked = (oligo >> 2) & MASK5; /* 14 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 17;
+ }
+ }
+
+ masked = (oligo >> 4) & MASK5; /* 13 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 18;
+ }
+ }
+
+ masked = (oligo >> 6) & MASK5; /* 12 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 19;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 16;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 17;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 18;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 19;
+ }
+ }
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rev & MASK5; /* 11 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 20;
+ }
+ }
+
+ masked = (high_rev >> 2) & MASK5; /* 10 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 21;
+ }
+ }
+
+ masked = (high_rev >> 4) & MASK5; /* 9 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 22;
+ }
+ }
+
+ masked = (high_rev >> 6) & MASK5; /* 8 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 23;
+ }
+ }
+
+ masked = (high_rev >> 8) & MASK5; /* 7 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 24;
+ }
+ }
+
+ masked = (high_rev >> 10) & MASK5; /* 6 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 25;
+ }
+ }
+
+ masked = (high_rev >> 12) & MASK5; /* 5 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 26;
+ }
+ }
+
+ masked = (high_rev >> 14) & MASK5; /* 4 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 27;
+ }
+ }
+
+ masked = (high_rev >> 16) & MASK5; /* 3 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 28;
+ }
+ }
+
+ masked = (high_rev >> 18) & MASK5; /* 2 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 29;
+ }
+ }
+
+ masked = (high_rev >> 20) & MASK5; /* 1 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 30;
+ }
+ }
+
+ masked = high_rev >> 22; /* 0, No mask necessary */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 31;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 20;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 21;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 22;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 23;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 24;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 25;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 26;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 27;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 28;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 29;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 30;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 31;
+ }
+ }
+#endif
+
+ return chrpos - 32;
+}
+
+
+#ifndef USE_SIMD_FOR_COUNTS
+static void
+count_positions_fwd_std (Count_T *counts, int indexsize, Univcoord_T left, Univcoord_T left_plus_length,
+ int genestrand) {
+ int startdiscard, enddiscard;
+ Genomecomp_T ptr, startptr, endptr, high_rev, low_rev, nexthigh_rev,
+ low, high, nextlow;
+
+ debug(printf("Starting count_positions_fwd_std\n"));
+
+ left_plus_length -= indexsize;
+
+ startptr = left/32U*3;
+ ptr = endptr = left_plus_length/32U*3;
+ startdiscard = left % 32; /* (left+pos5) % 32 */
+ enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
+
+ if (left_plus_length <= left) {
+ /* Skip */
+
+ } else if (startptr == endptr) {
+#ifdef WORDS_BIGENDIAN
+ high = Bigendian_convert_uint(ref_blocks[ptr]);
+ low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high = ref_blocks[ptr];
+ low = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ high_rev = reverse_nt[low >> 16];
+ high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+ low_rev = reverse_nt[high >> 16];
+ low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+ nexthigh_rev = reverse_nt[nextlow >> 16];
+ nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+
+ if (indexsize == 9) {
+ count_9mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else if (indexsize == 8) {
+ count_8mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else if (indexsize == 7) {
+ count_7mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else if (indexsize == 6) {
+ count_6mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else if (indexsize == 5) {
+ count_5mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else {
+ fprintf(stderr,"indexsize %d not supported\n",indexsize);
+ abort();
+ }
+
+ } else {
+ /* Genome_print_blocks(ref_blocks,left,left+16); */
+
+ /* End block */
+#ifdef WORDS_BIGENDIAN
+ high = Bigendian_convert_uint(ref_blocks[ptr]);
+ low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high = ref_blocks[ptr];
+ low = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ high_rev = reverse_nt[low >> 16];
+ high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+ low_rev = reverse_nt[high >> 16];
+ low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+ nexthigh_rev = reverse_nt[nextlow >> 16];
+ nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+
+ if (indexsize == 9) {
+ count_9mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 8) {
+ count_8mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 7) {
+ count_7mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 6) {
+ count_6mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 5) {
+ count_5mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else {
+ abort();
+ }
+
+ /* Middle blocks */
+ if (indexsize == 9) {
+ while (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high = Bigendian_convert_uint(ref_blocks[ptr]);
+ low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high = ref_blocks[ptr];
+ low = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ high_rev = reverse_nt[low >> 16];
+ high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+ low_rev = reverse_nt[high >> 16];
+ low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+ nexthigh_rev = reverse_nt[nextlow >> 16];
+ nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+
+ count_9mers_fwd(counts,high_rev,low_rev,nexthigh_rev);
+ }
+
+ } else if (indexsize == 8) {
+ while (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high = Bigendian_convert_uint(ref_blocks[ptr]);
+ low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high = ref_blocks[ptr];
+ low = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ high_rev = reverse_nt[low >> 16];
+ high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+ low_rev = reverse_nt[high >> 16];
+ low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+ nexthigh_rev = reverse_nt[nextlow >> 16];
+ nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+
+ count_8mers_fwd(counts,high_rev,low_rev,nexthigh_rev);
+ }
+
+ } else if (indexsize == 7) {
+ while (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high = Bigendian_convert_uint(ref_blocks[ptr]);
+ low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high = ref_blocks[ptr];
+ low = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ high_rev = reverse_nt[low >> 16];
+ high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+ low_rev = reverse_nt[high >> 16];
+ low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+ nexthigh_rev = reverse_nt[nextlow >> 16];
+ nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+
+ count_7mers_fwd(counts,high_rev,low_rev,nexthigh_rev);
+ }
+
+ } else if (indexsize == 6) {
+ while (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high = Bigendian_convert_uint(ref_blocks[ptr]);
+ low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high = ref_blocks[ptr];
+ low = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ high_rev = reverse_nt[low >> 16];
+ high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+ low_rev = reverse_nt[high >> 16];
+ low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+ nexthigh_rev = reverse_nt[nextlow >> 16];
+ nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+
+ count_6mers_fwd(counts,high_rev,low_rev,nexthigh_rev);
+ }
+
+ } else if (indexsize == 5) {
+ while (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high = Bigendian_convert_uint(ref_blocks[ptr]);
+ low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high = ref_blocks[ptr];
+ low = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ high_rev = reverse_nt[low >> 16];
+ high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+ low_rev = reverse_nt[high >> 16];
+ low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+ nexthigh_rev = reverse_nt[nextlow >> 16];
+ nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+
+ count_5mers_fwd(counts,high_rev,low_rev,nexthigh_rev);
+ }
+
+ } else {
+ abort();
+ }
+
+ ptr -= 3;
+
+ /* Start block */
+ assert(ptr == startptr);
+
+#ifdef WORDS_BIGENDIAN
+ high = Bigendian_convert_uint(ref_blocks[ptr]);
+ low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high = ref_blocks[ptr];
+ low = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ high_rev = reverse_nt[low >> 16];
+ high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+ low_rev = reverse_nt[high >> 16];
+ low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+ nexthigh_rev = reverse_nt[nextlow >> 16];
+ nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+
+ if (indexsize == 9) {
+ count_9mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 8) {
+ count_8mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 7) {
+ count_7mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 6) {
+ count_6mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 5) {
+ count_5mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else {
+ fprintf(stderr,"indexsize %d not supported\n",indexsize);
+ abort();
+ }
+
+ }
+
+ return;
+}
+#endif
+
+
+
+#ifdef USE_SIMD_FOR_COUNTS
+static void
+count_positions_fwd_simd (Count_T *counts, int indexsize,
+ Univcoord_T left, Univcoord_T left_plus_length, int genestrand) {
+ int startdiscard, enddiscard;
+ Genomecomp_T ptr, startptr, endptr, nexthigh_rev, nextlow;
+ Genomecomp_T high0_rev, low0_rev, low0, high0, low1, high1;
+ __m128i current, next, mask2, mask4;
+ /* __m128i array[16]; */
+#ifdef HAVE_SSSE3
+ __m128i reverse8;
+#else
+ __m128i mask8;
+#endif
+#ifdef HAVE_SSE4_1
+ __m128i temp;
+#else
+ Genomecomp_T high1_rev, low1_rev;
+#endif
+
+
+ debug(printf("Starting count_positions_fwd_simd\n"));
+
+ left_plus_length -= indexsize;
+
+ startptr = left/32U*3;
+ ptr = endptr = left_plus_length/32U*3;
+ startdiscard = left % 32; /* (left+pos5) % 32 */
+ enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
+
+ mask2 = _mm_set1_epi32(0x33333333);
+ mask4 = _mm_set1_epi32(0x0F0F0F0F);
+#ifdef HAVE_SSSE3
+ reverse8 = _mm_set_epi8(0x0C,0x0D,0x0E,0x0F, 0x08,0x09,0x0A,0x0B, 0x04,0x05,0x06,0x07, 0x00,0x01,0x02,0x03);
+#else
+ mask8 = _mm_set1_epi32(0x00FF00FF);
+#endif
+
+ if (left_plus_length <= left) {
+ /* Skip */
+
+ } else if (startptr == endptr) {
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ current = _mm_set_epi32(0,nextlow,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+ nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2);
+ assert(nexthigh_rev == (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16));
+
+ if (indexsize == 9) {
+ count_9mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else if (indexsize == 8) {
+ count_8mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else if (indexsize == 7) {
+ count_7mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else if (indexsize == 6) {
+ count_6mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else if (indexsize == 5) {
+ count_5mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else {
+ fprintf(stderr,"indexsize %d not supported\n",indexsize);
+ abort();
+ }
+
+ } else {
+ /* Genome_print_blocks(ref_blocks,left,left+16); */
+
+ /* End block */
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ current = _mm_set_epi32(0,nextlow,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+ nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2);
+ assert(nexthigh_rev == (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16));
+
+ if (indexsize == 9) {
+ count_9mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 8) {
+ count_8mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 7) {
+ count_7mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 6) {
+ count_6mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 5) {
+ count_5mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else {
+ abort();
+ }
+
+ /* Middle blocks */
+ if (indexsize == 9) {
+ while (ptr > startptr + 6) {
+ ptr -= 6;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
+ low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ high1 = ref_blocks[ptr+3];
+ low1 = ref_blocks[ptr+4];
+ /* nextlow = ref_blocks[ptr+7]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+ /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(high1,low1,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+
+#ifdef HAVE_SSE4_1
+ temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
+ next = _mm_shuffle_epi32(temp,0x39);
+#else
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+ high1_rev = (unsigned int) _mm_extract_epi32(current,2);
+ assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
+ low1_rev = (unsigned int) _mm_extract_epi32(current,3);
+ assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+
+ next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
+#endif
+
+#if 0
+ extract_9mers_fwd_simd(array,current,next);
+ count_fwdrev_simd(counts,(Genomecomp_T *) array);
+#else
+ count_9mers_fwd_simd(counts,current,next);
+#endif
+ }
+
+ if (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ /* nextlow = ref_blocks[ptr+4]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(0,0,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+
+ count_9mers_fwd(counts,high0_rev,low0_rev,nexthigh_rev);
+ }
+
+ } else if (indexsize == 8) {
+ while (ptr > startptr + 6) {
+ ptr -= 6;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
+ low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ high1 = ref_blocks[ptr+3];
+ low1 = ref_blocks[ptr+4];
+ /* nextlow = ref_blocks[ptr+7]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+ /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(high1,low1,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+
+#ifdef HAVE_SSE4_1
+ temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
+ next = _mm_shuffle_epi32(temp,0x39);
+#else
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+ high1_rev = (unsigned int) _mm_extract_epi32(current,2);
+ assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
+ low1_rev = (unsigned int) _mm_extract_epi32(current,3);
+ assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+
+ next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
+#endif
+
+#if 0
+ extract_8mers_fwd_simd(array,current,next);
+ count_fwdrev_simd(counts,(Genomecomp_T *) array);
+#else
+ count_8mers_fwd_simd(counts,current,next);
+#endif
+ }
+
+ if (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ /* nextlow = ref_blocks[ptr+4]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(0,0,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+
+ count_8mers_fwd(counts,high0_rev,low0_rev,nexthigh_rev);
+ }
+
+ } else if (indexsize == 7) {
+ while (ptr > startptr + 6) {
+ ptr -= 6;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
+ low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ high1 = ref_blocks[ptr+3];
+ low1 = ref_blocks[ptr+4];
+ /* nextlow = ref_blocks[ptr+7]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+ /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(high1,low1,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+
+#ifdef HAVE_SSE4_1
+ temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
+ next = _mm_shuffle_epi32(temp,0x39);
+#else
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+ high1_rev = (unsigned int) _mm_extract_epi32(current,2);
+ assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
+ low1_rev = (unsigned int) _mm_extract_epi32(current,3);
+ assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+
+ next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
+#endif
+
+#if 0
+ extract_7mers_fwd_simd(array,current,next);
+ count_fwdrev_simd(counts,(Genomecomp_T *) array);
+#else
+ count_7mers_fwd_simd(counts,current,next);
+#endif
+ }
+
+ if (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ /* nextlow = ref_blocks[ptr+4]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(0,0,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+
+ count_7mers_fwd(counts,high0_rev,low0_rev,nexthigh_rev);
+ }
+
+ } else if (indexsize == 6) {
+ while (ptr > startptr + 6) {
+ ptr -= 6;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
+ low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ high1 = ref_blocks[ptr+3];
+ low1 = ref_blocks[ptr+4];
+ /* nextlow = ref_blocks[ptr+7]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+ /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(high1,low1,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+
+#ifdef HAVE_SSE4_1
+ temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
+ next = _mm_shuffle_epi32(temp,0x39);
+#else
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+ high1_rev = (unsigned int) _mm_extract_epi32(current,2);
+ assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
+ low1_rev = (unsigned int) _mm_extract_epi32(current,3);
+ assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+
+ next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
+#endif
+
+#if 0
+ extract_6mers_fwd_simd(array,current,next);
+ count_fwdrev_simd(counts,(Genomecomp_T *) array);
+#else
+ count_6mers_fwd_simd(counts,current,next);
+#endif
+ }
+
+ if (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ /* nextlow = ref_blocks[ptr+4]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(0,0,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+
+ count_6mers_fwd(counts,high0_rev,low0_rev,nexthigh_rev);
+ }
+
+ } else if (indexsize == 5) {
+ while (ptr > startptr + 6) {
+ ptr -= 6;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
+ low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ high1 = ref_blocks[ptr+3];
+ low1 = ref_blocks[ptr+4];
+ /* nextlow = ref_blocks[ptr+7]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+ /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(high1,low1,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+
+#ifdef HAVE_SSE4_1
+ temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
+ next = _mm_shuffle_epi32(temp,0x39);
+#else
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+ high1_rev = (unsigned int) _mm_extract_epi32(current,2);
+ assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
+ low1_rev = (unsigned int) _mm_extract_epi32(current,3);
+ assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+
+ next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
+#endif
+
+#if 0
+ extract_5mers_fwd_simd(array,current,next);
+ count_fwdrev_simd(counts,(Genomecomp_T *) array);
+#else
+ count_5mers_fwd_simd(counts,current,next);
+#endif
+ }
+
+ if (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ /* nextlow = ref_blocks[ptr+4]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(0,0,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+
+ count_5mers_fwd(counts,high0_rev,low0_rev,nexthigh_rev);
+ }
+
+ } else {
+ abort();
+ }
+
+ ptr -= 3;
+
+ /* Start block */
+ assert(ptr == startptr);
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ /* nextlow = ref_blocks[ptr+4]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(0,0,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+
+ if (indexsize == 9) {
+ count_9mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 8) {
+ count_8mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 7) {
+ count_7mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 6) {
+ count_6mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 5) {
+ count_5mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else {
+ fprintf(stderr,"indexsize %d not supported\n",indexsize);
+ abort();
+ }
+ }
+
+ return;
+}
+#endif
+
+
+#ifndef USE_SIMD_FOR_COUNTS
+static void
+store_positions_fwd_std (Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts, int indexsize,
+ Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos,
+ int genestrand) {
+ int startdiscard, enddiscard;
+ Genomecomp_T ptr, startptr, endptr, high_rev, low_rev, nexthigh_rev,
+ low, high, nextlow;
+
+
+ left_plus_length -= indexsize;
+ chrpos += (left_plus_length - left); /* We are starting from the right */
+
+ startptr = left/32U*3;
+ ptr = endptr = left_plus_length/32U*3;
+ startdiscard = left % 32; /* (left+pos5) % 32 */
+ enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
+
+ if (left_plus_length <= left) {
+ /* Skip */
+
+ } else if (startptr == endptr) {
+#ifdef WORDS_BIGENDIAN
+ high = Bigendian_convert_uint(ref_blocks[ptr]);
+ low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high = ref_blocks[ptr];
+ low = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ high_rev = reverse_nt[low >> 16];
+ high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+ low_rev = reverse_nt[high >> 16];
+ low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+ nexthigh_rev = reverse_nt[nextlow >> 16];
+ nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+
+ if (indexsize == 9) {
+ chrpos = store_9mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else if (indexsize == 8) {
+ chrpos = store_8mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else if (indexsize == 7) {
+ chrpos = store_7mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else if (indexsize == 6) {
+ chrpos = store_6mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else if (indexsize == 5) {
+ chrpos = store_5mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else {
+ fprintf(stderr,"indexsize %d not supported\n",indexsize);
+ abort();
+ }
+
+ } else {
+ /* Genome_print_blocks(ref_blocks,left,left+16); */
+
+ /* End block */
+#ifdef WORDS_BIGENDIAN
+ high = Bigendian_convert_uint(ref_blocks[ptr]);
+ low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high = ref_blocks[ptr];
+ low = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ high_rev = reverse_nt[low >> 16];
+ high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+ low_rev = reverse_nt[high >> 16];
+ low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+ nexthigh_rev = reverse_nt[nextlow >> 16];
+ nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+
+ if (indexsize == 9) {
+ chrpos = store_9mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 8) {
+ chrpos = store_8mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 7) {
+ chrpos = store_7mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 6) {
+ chrpos = store_6mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 5) {
+ chrpos = store_5mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else {
+ abort();
+ }
+
+ if (indexsize == 9) {
+ while (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high = Bigendian_convert_uint(ref_blocks[ptr]);
+ low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high = ref_blocks[ptr];
+ low = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ high_rev = reverse_nt[low >> 16];
+ high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+ low_rev = reverse_nt[high >> 16];
+ low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+ nexthigh_rev = reverse_nt[nextlow >> 16];
+ nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+
+ chrpos = store_9mers_fwd(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev);
+ }
+
+ } else if (indexsize == 8) {
+ while (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high = Bigendian_convert_uint(ref_blocks[ptr]);
+ low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high = ref_blocks[ptr];
+ low = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ high_rev = reverse_nt[low >> 16];
+ high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+ low_rev = reverse_nt[high >> 16];
+ low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+ nexthigh_rev = reverse_nt[nextlow >> 16];
+ nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+
+ chrpos = store_8mers_fwd(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev);
+ }
+
+ } else if (indexsize == 7) {
+ while (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high = Bigendian_convert_uint(ref_blocks[ptr]);
+ low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high = ref_blocks[ptr];
+ low = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ high_rev = reverse_nt[low >> 16];
+ high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+ low_rev = reverse_nt[high >> 16];
+ low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+ nexthigh_rev = reverse_nt[nextlow >> 16];
+ nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+
+ chrpos = store_7mers_fwd(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev);
+ }
+
+ } else if (indexsize == 6) {
+ while (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high = Bigendian_convert_uint(ref_blocks[ptr]);
+ low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high = ref_blocks[ptr];
+ low = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ high_rev = reverse_nt[low >> 16];
+ high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+ low_rev = reverse_nt[high >> 16];
+ low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+ nexthigh_rev = reverse_nt[nextlow >> 16];
+ nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+
+ chrpos = store_6mers_fwd(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev);
+ }
+
+ } else if (indexsize == 5) {
+ while (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high = Bigendian_convert_uint(ref_blocks[ptr]);
+ low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high = ref_blocks[ptr];
+ low = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ high_rev = reverse_nt[low >> 16];
+ high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+ low_rev = reverse_nt[high >> 16];
+ low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+ nexthigh_rev = reverse_nt[nextlow >> 16];
+ nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+
+ chrpos = store_5mers_fwd(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev);
+ }
+ } else {
+ abort();
+ }
+
+ ptr -= 3;
+
+ /* Start block */
+ assert(ptr == startptr);
+
+#ifdef WORDS_BIGENDIAN
+ high = Bigendian_convert_uint(ref_blocks[ptr]);
+ low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high = ref_blocks[ptr];
+ low = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ high_rev = reverse_nt[low >> 16];
+ high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+ low_rev = reverse_nt[high >> 16];
+ low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+ nexthigh_rev = reverse_nt[nextlow >> 16];
+ nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+
+ if (indexsize == 9) {
+ chrpos = store_9mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 8) {
+ chrpos = store_8mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 7) {
+ chrpos = store_7mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 6) {
+ chrpos = store_6mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 5) {
+ chrpos = store_5mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else {
+ fprintf(stderr,"indexsize %d not supported\n",indexsize);
+ abort();
+ }
+
+ }
+
+ return;
+}
+#endif
+
+
+#ifdef USE_SIMD_FOR_COUNTS
+static void
+store_positions_fwd_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts, int indexsize,
+ Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos,
+ int genestrand) {
+ int startdiscard, enddiscard;
+ Genomecomp_T ptr, startptr, endptr, nexthigh_rev, nextlow;
+ Genomecomp_T high0_rev, low0_rev, low0, high0, low1, high1;
+ __m128i current, next, mask2, mask4;
+ __m128i array[16];
+#ifdef HAVE_SSSE3
+ __m128i reverse8;
+#else
+ __m128i mask8;
+#endif
+#ifdef HAVE_SSE4_1
+ __m128i temp;
+#else
+ Genomecomp_T high1_rev, low1_rev;
+#endif
+
+
+ debug(printf("Starting store_positions_fwd_simd\n"));
+
+ left_plus_length -= indexsize;
+ chrpos += (left_plus_length - left); /* We are starting from the right */
+
+ startptr = left/32U*3;
+ ptr = endptr = left_plus_length/32U*3;
+ startdiscard = left % 32; /* (left+pos5) % 32 */
+ enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
+
+ mask2 = _mm_set1_epi32(0x33333333);
+ mask4 = _mm_set1_epi32(0x0F0F0F0F);
+#ifdef HAVE_SSSE3
+ reverse8 = _mm_set_epi8(0x0C,0x0D,0x0E,0x0F, 0x08,0x09,0x0A,0x0B, 0x04,0x05,0x06,0x07, 0x00,0x01,0x02,0x03);
+#else
+ mask8 = _mm_set1_epi32(0x00FF00FF);
+#endif
+
+ if (left_plus_length <= left) {
+ /* Skip */
+
+ } else if (startptr == endptr) {
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ current = _mm_set_epi32(0,nextlow,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+ nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2);
+ assert(nexthigh_rev == (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16));
+
+ if (indexsize == 9) {
+ chrpos = store_9mers_fwd_partial(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else if (indexsize == 8) {
+ chrpos = store_8mers_fwd_partial(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else if (indexsize == 7) {
+ chrpos = store_7mers_fwd_partial(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else if (indexsize == 6) {
+ chrpos = store_6mers_fwd_partial(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else if (indexsize == 5) {
+ chrpos = store_5mers_fwd_partial(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+ } else {
+ fprintf(stderr,"indexsize %d not supported\n",indexsize);
+ abort();
+ }
+
+ } else {
+ /* Genome_print_blocks(ref_blocks,left,left+16); */
+
+ /* End block */
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ current = _mm_set_epi32(0,nextlow,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+ nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2);
+ assert(nexthigh_rev == (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16));
+
+ if (indexsize == 9) {
+ chrpos = store_9mers_fwd_partial(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 8) {
+ chrpos = store_8mers_fwd_partial(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 7) {
+ chrpos = store_7mers_fwd_partial(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 6) {
+ chrpos = store_6mers_fwd_partial(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 5) {
+ chrpos = store_5mers_fwd_partial(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ } else {
+ abort();
+ }
+
+ /* Middle blocks */
+ if (indexsize == 9) {
+ while (ptr > startptr + 6) {
+ ptr -= 6;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
+ low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ high1 = ref_blocks[ptr+3];
+ low1 = ref_blocks[ptr+4];
+ /* nextlow = ref_blocks[ptr+7]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+ /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(high1,low1,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+
+#ifdef HAVE_SSE4_1
+ temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
+ next = _mm_shuffle_epi32(temp,0x39);
+#else
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+ high1_rev = (unsigned int) _mm_extract_epi32(current,2);
+ assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
+ low1_rev = (unsigned int) _mm_extract_epi32(current,3);
+ assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+
+ next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
+#endif
+
+ extract_9mers_fwd_simd(array,current,next);
+ chrpos = store_fwdrev_simd(chrpos,pointers,positions,counts,(Genomecomp_T *) array);
+ }
+
+ if (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ /* nextlow = ref_blocks[ptr+4]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(0,0,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+
+ chrpos = store_9mers_fwd(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev);
+ }
+
+ } else if (indexsize == 8) {
+ while (ptr > startptr + 6) {
+ ptr -= 6;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
+ low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ high1 = ref_blocks[ptr+3];
+ low1 = ref_blocks[ptr+4];
+ /* nextlow = ref_blocks[ptr+7]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+ /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(high1,low1,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+
+#ifdef HAVE_SSE4_1
+ temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
+ next = _mm_shuffle_epi32(temp,0x39);
+#else
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+ high1_rev = (unsigned int) _mm_extract_epi32(current,2);
+ assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
+ low1_rev = (unsigned int) _mm_extract_epi32(current,3);
+ assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+
+ next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
+#endif
+
+ extract_8mers_fwd_simd(array,current,next);
+ chrpos = store_fwdrev_simd(chrpos,pointers,positions,counts,(Genomecomp_T *) array);
+ }
+
+ if (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ /* nextlow = ref_blocks[ptr+4]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(0,0,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+
+ chrpos = store_8mers_fwd(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev);
+ }
+
+ } else if (indexsize == 7) {
+ while (ptr > startptr + 6) {
+ ptr -= 6;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
+ low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ high1 = ref_blocks[ptr+3];
+ low1 = ref_blocks[ptr+4];
+ /* nextlow = ref_blocks[ptr+7]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+ /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(high1,low1,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+
+#ifdef HAVE_SSE4_1
+ temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
+ next = _mm_shuffle_epi32(temp,0x39);
+#else
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+ high1_rev = (unsigned int) _mm_extract_epi32(current,2);
+ assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
+ low1_rev = (unsigned int) _mm_extract_epi32(current,3);
+ assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+
+ next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
+#endif
+
+ extract_7mers_fwd_simd(array,current,next);
+ chrpos = store_fwdrev_simd(chrpos,pointers,positions,counts,(Genomecomp_T *) array);
+ }
+
+ if (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ /* nextlow = ref_blocks[ptr+4]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(0,0,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+
+ chrpos = store_7mers_fwd(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev);
+ }
+
+ } else if (indexsize == 6) {
+ while (ptr > startptr + 6) {
+ ptr -= 6;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
+ low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ high1 = ref_blocks[ptr+3];
+ low1 = ref_blocks[ptr+4];
+ /* nextlow = ref_blocks[ptr+7]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+ /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(high1,low1,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+
+#ifdef HAVE_SSE4_1
+ temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
+ next = _mm_shuffle_epi32(temp,0x39);
+#else
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+ high1_rev = (unsigned int) _mm_extract_epi32(current,2);
+ assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
+ low1_rev = (unsigned int) _mm_extract_epi32(current,3);
+ assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+
+ next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
+#endif
+
+ extract_6mers_fwd_simd(array,current,next);
+ chrpos = store_fwdrev_simd(chrpos,pointers,positions,counts,(Genomecomp_T *) array);
+ }
+
+ if (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ /* nextlow = ref_blocks[ptr+4]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(0,0,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+
+ chrpos = store_6mers_fwd(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev);
+ }
+
+ } else if (indexsize == 5) {
+ while (ptr > startptr + 6) {
+ ptr -= 6;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
+ low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ high1 = ref_blocks[ptr+3];
+ low1 = ref_blocks[ptr+4];
+ /* nextlow = ref_blocks[ptr+7]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+ /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(high1,low1,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+
+#ifdef HAVE_SSE4_1
+ temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
+ next = _mm_shuffle_epi32(temp,0x39);
+#else
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+ high1_rev = (unsigned int) _mm_extract_epi32(current,2);
+ assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
+ low1_rev = (unsigned int) _mm_extract_epi32(current,3);
+ assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+
+ next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
+#endif
+
+ extract_5mers_fwd_simd(array,current,next);
+ chrpos = store_fwdrev_simd(chrpos,pointers,positions,counts,(Genomecomp_T *) array);
+ }
+
+ if (ptr > startptr + 3) {
+ ptr -= 3;
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ /* nextlow = ref_blocks[ptr+4]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(0,0,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+
+ chrpos = store_5mers_fwd(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev);
+ }
+
+ } else {
+ abort();
+ }
+
+ ptr -= 3;
+
+ /* Start block */
+ assert(ptr == startptr);
+
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
+#else
+ high0 = ref_blocks[ptr];
+ low0 = ref_blocks[ptr+1];
+ /* nextlow = ref_blocks[ptr+4]; */
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+ } else {
+ high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
+ }
+ }
+
+ current = _mm_set_epi32(0,0,high0,low0);
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+#ifdef HAVE_SSSE3
+ current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+#else
+ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+#endif
+
+ nexthigh_rev = high0_rev;
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+ assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+
+ if (indexsize == 9) {
+ chrpos = store_9mers_fwd_partial(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 8) {
+ chrpos = store_8mers_fwd_partial(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 7) {
+ chrpos = store_7mers_fwd_partial(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 6) {
+ chrpos = store_6mers_fwd_partial(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 5) {
+ chrpos = store_5mers_fwd_partial(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+ } else {
+ fprintf(stderr,"indexsize %d not supported\n",indexsize);
+ abort();
+ }
+ }
+
+ return;
+}
+#endif
+
+
+/************************************************************************
+ * REV
+ ************************************************************************/
+
+static void
+count_9mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
+ int startdiscard, int enddiscard) {
+ Genomecomp_T masked;
+ int pos;
+
+ pos = startdiscard;
+
+ while (pos <= enddiscard && pos <= 7) {
+ masked = low_rc >> 2*pos;
+ masked &= MASK9;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 15) {
+ masked = low_rc >> 2*pos;
+ masked |= high_rc << (32 - 2*pos);
+ masked &= MASK9;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 23) {
+ masked = high_rc >> (2*pos - 32);
+ masked &= MASK9;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ while (pos <= enddiscard) {
+ masked = high_rc >> (2*pos - 32);
+ masked |= nextlow_rc << (64 - 2*pos);
+ masked &= MASK9;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ return;
+}
+
+static int
+store_9mers_rev_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+ Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
+ int startdiscard, int enddiscard) {
+ Genomecomp_T masked;
+ int pos;
+
+ pos = startdiscard;
+
+ while (pos <= enddiscard && pos <= 7) {
+ masked = low_rc >> 2*pos;
+ masked &= MASK9;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 15) {
+ masked = low_rc >> 2*pos;
+ masked |= high_rc << (32 - 2*pos);
+ masked &= MASK9;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 23) {
+ masked = high_rc >> (2*pos - 32);
+ masked &= MASK9;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ while (pos <= enddiscard) {
+ masked = high_rc >> (2*pos - 32);
+ masked |= nextlow_rc << (64 - 2*pos);
+ masked &= MASK9;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ return chrpos;
+}
+
+
+static void
+count_8mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
+ int startdiscard, int enddiscard) {
+ Genomecomp_T masked;
+ int pos;
+
+ pos = startdiscard;
+
+ while (pos <= enddiscard && pos <= 8) {
+ masked = low_rc >> 2*pos;
+ masked &= MASK8;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 15) {
+ masked = low_rc >> 2*pos;
+ masked |= high_rc << (32 - 2*pos);
+ masked &= MASK8;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 24) {
+ masked = high_rc >> (2*pos - 32);
+ masked &= MASK8;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ while (pos <= enddiscard) {
+ masked = high_rc >> (2*pos - 32);
+ masked |= nextlow_rc << (64 - 2*pos);
+ masked &= MASK8;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ return;
+}
+
+
+static int
+store_8mers_rev_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+ Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
+ int startdiscard, int enddiscard) {
+ Genomecomp_T masked;
+ int pos;
+
+ pos = startdiscard;
+
+ while (pos <= enddiscard && pos <= 8) {
+ masked = low_rc >> 2*pos;
+ masked &= MASK8;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 15) {
+ masked = low_rc >> 2*pos;
+ masked |= high_rc << (32 - 2*pos);
+ masked &= MASK8;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 24) {
+ masked = high_rc >> (2*pos - 32);
+ masked &= MASK8;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ while (pos <= enddiscard) {
+ masked = high_rc >> (2*pos - 32);
+ masked |= nextlow_rc << (64 - 2*pos);
+ masked &= MASK8;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ return chrpos;
+}
+
+
+static void
+count_7mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
+ int startdiscard, int enddiscard) {
+ Genomecomp_T masked;
+ int pos;
+
+ pos = startdiscard;
+
+ while (pos <= enddiscard && pos <= 9) {
+ masked = low_rc >> 2*pos;
+ masked &= MASK7;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 15) {
+ masked = low_rc >> 2*pos;
+ masked |= high_rc << (32 - 2*pos);
+ masked &= MASK7;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 25) {
+ masked = high_rc >> (2*pos - 32);
+ masked &= MASK7;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ while (pos <= enddiscard) {
+ masked = high_rc >> (2*pos - 32);
+ masked |= nextlow_rc << (64 - 2*pos);
+ masked &= MASK7;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ return;
+}
+
+
+static int
+store_7mers_rev_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+ Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
+ int startdiscard, int enddiscard) {
+ Genomecomp_T masked;
+ int pos;
+
+ pos = startdiscard;
+
+ while (pos <= enddiscard && pos <= 9) {
+ masked = low_rc >> 2*pos;
+ masked &= MASK7;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 15) {
+ masked = low_rc >> 2*pos;
+ masked |= high_rc << (32 - 2*pos);
+ masked &= MASK7;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 25) {
+ masked = high_rc >> (2*pos - 32);
+ masked &= MASK7;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ while (pos <= enddiscard) {
+ masked = high_rc >> (2*pos - 32);
+ masked |= nextlow_rc << (64 - 2*pos);
+ masked &= MASK7;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ return chrpos;
+}
+
+
+static void
+count_6mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
+ int startdiscard, int enddiscard) {
+ Genomecomp_T masked;
+ int pos;
+
+ pos = startdiscard;
+
+ while (pos <= enddiscard && pos <= 10) {
+ masked = low_rc >> 2*pos;
+ masked &= MASK6;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 15) {
+ masked = low_rc >> 2*pos;
+ masked |= high_rc << (32 - 2*pos);
+ masked &= MASK6;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 26) {
+ masked = high_rc >> (2*pos - 32);
+ masked &= MASK6;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ while (pos <= enddiscard) {
+ masked = high_rc >> (2*pos - 32);
+ masked |= nextlow_rc << (64 - 2*pos);
+ masked &= MASK6;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ return;
+}
+
+
+static int
+store_6mers_rev_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+ Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
+ int startdiscard, int enddiscard) {
+ Genomecomp_T masked;
+ int pos;
+
+ pos = startdiscard;
+
+ while (pos <= enddiscard && pos <= 10) {
+ masked = low_rc >> 2*pos;
+ masked &= MASK6;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 15) {
+ masked = low_rc >> 2*pos;
+ masked |= high_rc << (32 - 2*pos);
+ masked &= MASK6;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 26) {
+ masked = high_rc >> (2*pos - 32);
+ masked &= MASK6;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ while (pos <= enddiscard) {
+ masked = high_rc >> (2*pos - 32);
+ masked |= nextlow_rc << (64 - 2*pos);
+ masked &= MASK6;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ return chrpos;
+}
+
+
+static void
+count_5mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
+ int startdiscard, int enddiscard) {
+ Genomecomp_T masked;
+ int pos;
+
+ pos = startdiscard;
+
+ while (pos <= enddiscard && pos <= 11) {
+ masked = low_rc >> 2*pos;
+ masked &= MASK5;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 15) {
+ masked = low_rc >> 2*pos;
+ masked |= high_rc << (32 - 2*pos);
+ masked &= MASK5;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 27) {
+ masked = high_rc >> (2*pos - 32);
+ masked &= MASK5;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ while (pos <= enddiscard) {
+ masked = high_rc >> (2*pos - 32);
+ masked |= nextlow_rc << (64 - 2*pos);
+ masked &= MASK5;
+ counts[masked] += 1;
+ debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
+ pos++;
+ }
+
+ return;
+}
+
+
+static int
+store_5mers_rev_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+ Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
+ int startdiscard, int enddiscard) {
+ Genomecomp_T masked;
+ int pos;
+
+ pos = startdiscard;
+
+ while (pos <= enddiscard && pos <= 11) {
+ masked = low_rc >> 2*pos;
+ masked &= MASK5;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 15) {
+ masked = low_rc >> 2*pos;
+ masked |= high_rc << (32 - 2*pos);
+ masked &= MASK5;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ while (pos <= enddiscard && pos <= 27) {
+ masked = high_rc >> (2*pos - 32);
+ masked &= MASK5;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ while (pos <= enddiscard) {
+ masked = high_rc >> (2*pos - 32);
+ masked |= nextlow_rc << (64 - 2*pos);
+ masked &= MASK5;
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+ chrpos--;
+ pos++;
+ }
+
+ return chrpos;
+}
+
+
+static void
+count_9mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+ Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rc & MASK9; /* 0 */
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 2) & MASK9; /* 1 */
+ counts[masked] += 1;
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 4) & MASK9; /* 2 */
+ counts[masked] += 1;
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 6) & MASK9; /* 3 */
+ counts[masked] += 1;
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 8) & MASK9; /* 4 */
+ counts[masked] += 1;
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 10) & MASK9; /* 5 */
+ counts[masked] += 1;
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 12) & MASK9; /* 6 */
+ counts[masked] += 1;
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
+
+ masked = low_rc >> 14; /* 7, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+ oligo = low_rc >> 16; /* For 15..8 */
+ oligo |= high_rc << 16;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK9; /* 8 */
+ counts[masked] += 1;
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 2) & MASK9; /* 9 */
+ counts[masked] += 1;
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 4) & MASK9; /* 10 */
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 6) & MASK9; /* 11 */
+ counts[masked] += 1;
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 8) & MASK9; /* 12 */
+ counts[masked] += 1;
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 10) & MASK9; /* 13 */
+ counts[masked] += 1;
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 12) & MASK9; /* 14 */
+ counts[masked] += 1;
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 14) & MASK9; /* 15 */
+ counts[masked] += 1;
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rc & MASK9; /* 16 */
+ counts[masked] += 1;
+ debug(printf("16 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 2) & MASK9; /* 17 */
+ counts[masked] += 1;
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 4) & MASK9; /* 18 */
+ counts[masked] += 1;
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 6) & MASK9; /* 19 */
+ counts[masked] += 1;
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 8) & MASK9; /* 20 */
+ counts[masked] += 1;
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 10) & MASK9; /* 21 */
+ counts[masked] += 1;
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 12) & MASK9; /* 22 */
+ counts[masked] += 1;
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
+
+ masked = high_rc >> 14; /* 23, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("16 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+ oligo = high_rc >> 16; /* For 31..24 */
+ oligo |= nextlow_rc << 16;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK9; /* 24 */
+ counts[masked] += 1;
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 2) & MASK9; /* 25 */
+ counts[masked] += 1;
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 4) & MASK9; /* 26 */
+ counts[masked] += 1;
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 6) & MASK9; /* 27 */
+ counts[masked] += 1;
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 8) & MASK9; /* 28 */
+ counts[masked] += 1;
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 10) & MASK9; /* 29 */
+ counts[masked] += 1;
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 12) & MASK9; /* 30 */
+ counts[masked] += 1;
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 14) & MASK9; /* 31 */
+ counts[masked] += 1;
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
+#endif
+
+ return;
+}
+
+/* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc},
+ and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */
+#ifdef USE_SIMD_FOR_COUNTS
+static void
+extract_9mers_rev_simd (__m128i *out, __m128i current, __m128i next) {
+ __m128i oligo;
+
+ oligo = _mm_or_si128( _mm_srli_epi32(current,16), _mm_slli_epi32(next,16));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,14), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,12), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask9));
+ _mm_store_si128(out++, _mm_and_si128( oligo, mask9));
+
+ _mm_store_si128(out++, _mm_srli_epi32(current,14)); /* No mask necessary */;
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask9));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask9));
+ _mm_store_si128(out++, _mm_and_si128( current, mask9));
+
+ return;
+}
+
+static void
+count_9mers_rev_simd (Count_T *counts, __m128i current, __m128i next) {
+ __m128i oligo;
+ Genomecomp_T array[4];
+
+ oligo = _mm_or_si128( _mm_srli_epi32(current,16), _mm_slli_epi32(next,16));
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,14), mask9));
+ counts[array[0]] += 1; /* 63 */
+ counts[array[1]] += 1; /* 47 */
+ counts[array[2]] += 1; /* 31 */
+ counts[array[3]] += 1; /* 15 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,12), mask9));
+ counts[array[0]] += 1; /* 62 */
+ counts[array[1]] += 1; /* 46 */
+ counts[array[2]] += 1; /* 30 */
+ counts[array[3]] += 1; /* 14 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,10), mask9));
+ counts[array[0]] += 1; /* 61 */
+ counts[array[1]] += 1; /* 45 */
+ counts[array[2]] += 1; /* 29 */
+ counts[array[3]] += 1; /* 13 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask9));
+ counts[array[0]] += 1; /* 60 */
+ counts[array[1]] += 1; /* 44 */
+ counts[array[2]] += 1; /* 28 */
+ counts[array[3]] += 1; /* 12 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask9));
+ counts[array[0]] += 1; /* 59 */
+ counts[array[1]] += 1; /* 43 */
+ counts[array[2]] += 1; /* 27 */
+ counts[array[3]] += 1; /* 11 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask9));
+ counts[array[0]] += 1; /* 58 */
+ counts[array[1]] += 1; /* 42 */
+ counts[array[2]] += 1; /* 26 */
+ counts[array[3]] += 1; /* 10 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask9));
+ counts[array[0]] += 1; /* 57 */
+ counts[array[1]] += 1; /* 41 */
+ counts[array[2]] += 1; /* 25 */
+ counts[array[3]] += 1; /* 9 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask9));
+ counts[array[0]] += 1; /* 56 */
+ counts[array[1]] += 1; /* 50 */
+ counts[array[2]] += 1; /* 24 */
+ counts[array[3]] += 1; /* 8 */
+
+
+ _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,14)); /* No mask necessary */;
+ counts[array[0]] += 1; /* 55 */
+ counts[array[1]] += 1; /* 39 */
+ counts[array[2]] += 1; /* 23 */
+ counts[array[3]] += 1; /* 7 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask9));
+ counts[array[0]] += 1; /* 54 */
+ counts[array[1]] += 1; /* 38 */
+ counts[array[2]] += 1; /* 22 */
+ counts[array[3]] += 1; /* 6 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask9));
+ counts[array[0]] += 1; /* 53 */
+ counts[array[1]] += 1; /* 37 */
+ counts[array[2]] += 1; /* 21 */
+ counts[array[3]] += 1; /* 5 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask9));
+ counts[array[0]] += 1; /* 52 */
+ counts[array[1]] += 1; /* 36 */
+ counts[array[2]] += 1; /* 20 */
+ counts[array[3]] += 1; /* 4 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask9));
+ counts[array[0]] += 1; /* 51 */
+ counts[array[1]] += 1; /* 35 */
+ counts[array[2]] += 1; /* 19 */
+ counts[array[3]] += 1; /* 3 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask9));
+ counts[array[0]] += 1; /* 50 */
+ counts[array[1]] += 1; /* 34 */
+ counts[array[2]] += 1; /* 18 */
+ counts[array[3]] += 1; /* 2 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask9));
+ counts[array[0]] += 1; /* 49 */
+ counts[array[1]] += 1; /* 33 */
+ counts[array[2]] += 1; /* 17 */
+ counts[array[3]] += 1; /* 1 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask9));
+ counts[array[0]] += 1; /* 48 */
+ counts[array[1]] += 1; /* 32 */
+ counts[array[2]] += 1; /* 16 */
+ counts[array[3]] += 1; /* 0 */
+
+ return;
+}
+#endif
+
+
+static int
+store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+ Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+ Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rc & MASK9; /* 0 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+
+ masked = (low_rc >> 2) & MASK9; /* 1 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 1;
+ }
+ }
+
+ masked = (low_rc >> 4) & MASK9; /* 2 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 2;
+ }
+ }
+
+ masked = (low_rc >> 6) & MASK9; /* 3 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 3;
+ }
+ }
+
+ masked = (low_rc >> 8) & MASK9; /* 4 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 4;
+ }
+ }
+
+ masked = (low_rc >> 10) & MASK9; /* 5 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 5;
+ }
+ }
+
+ masked = (low_rc >> 12) & MASK9; /* 6 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 6;
+ }
+ }
+
+ masked = low_rc >> 14; /* 7, No mask necessary */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 7;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 1;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 2;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 3;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 4;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 5;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 6;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 7;
+ }
+ }
+#endif
+
+
+ oligo = low_rc >> 16; /* For 15..8 */
+ oligo |= high_rc << 16;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK9; /* 8 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 8;
+ }
+ }
+
+ masked = (oligo >> 2) & MASK9; /* 9 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 9;
+ }
+ }
+
+ masked = (oligo >> 4) & MASK9; /* 10 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 10;
+ }
+ }
+
+ masked = (oligo >> 6) & MASK9; /* 11 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 11;
+ }
+ }
+
+ masked = (oligo >> 8) & MASK9; /* 12 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 12;
+ }
+ }
+
+ masked = (oligo >> 10) & MASK9; /* 13 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 13;
+ }
+ }
+
+ masked = (oligo >> 12) & MASK9; /* 14 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 14;
+ }
+ }
+
+ masked = (oligo >> 14) & MASK9; /* 15 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 15;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 8;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 9;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 10;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 11;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 12;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 13;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 14;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 15;
+ }
+ }
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rc & MASK9; /* 16 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 16;
+ }
+ }
+
+ masked = (high_rc >> 2) & MASK9; /* 17 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 17;
+ }
+ }
+
+ masked = (high_rc >> 4) & MASK9; /* 18 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 18;
+ }
+ }
+
+ masked = (high_rc >> 6) & MASK9; /* 19 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 19;
+ }
+ }
+
+ masked = (high_rc >> 8) & MASK9; /* 20 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 20;
+ }
+ }
+
+ masked = (high_rc >> 10) & MASK9; /* 21 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 21;
+ }
+ }
+
+ masked = (high_rc >> 12) & MASK9; /* 22 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 22;
+ }
+ }
+
+ masked = high_rc >> 14; /* 23, No mask necessary */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 23;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 16;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 17;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 18;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 19;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 20;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 21;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 22;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 23;
+ }
+ }
+#endif
+
+
+ oligo = high_rc >> 16; /* For 31..24 */
+ oligo |= nextlow_rc << 16;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK9; /* 24 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 24;
+ }
+ }
+
+ masked = (oligo >> 2) & MASK9; /* 25 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 25;
+ }
+ }
+
+ masked = (oligo >> 4) & MASK9; /* 26 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 26;
+ }
+ }
+
+ masked = (oligo >> 6) & MASK9; /* 27 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 27;
+ }
+ }
+
+ masked = (oligo >> 8) & MASK9; /* 28 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 28;
+ }
+ }
+
+ masked = (oligo >> 10) & MASK9; /* 29 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 29;
+ }
+ }
+
+ masked = (oligo >> 12) & MASK9; /* 30 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 30;
+ }
+ }
+
+ masked = (oligo >> 14) & MASK9; /* 31 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 31;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 24;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 25;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 26;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 27;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask9);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 28;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 29;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 30;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 31;
+ }
+ }
+#endif
+
+ return chrpos - 32;
+}
+
+
+static void
+count_8mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+ Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rc & MASK8; /* 0 */
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 2) & MASK8; /* 1 */
+ counts[masked] += 1;
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 4) & MASK8; /* 2 */
+ counts[masked] += 1;
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 6) & MASK8; /* 3 */
+ counts[masked] += 1;
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 8) & MASK8; /* 4 */
+ counts[masked] += 1;
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 10) & MASK8; /* 5 */
+ counts[masked] += 1;
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 12) & MASK8; /* 6 */
+ counts[masked] += 1;
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 14) & MASK8; /* 7 */
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
+
+ masked = low_rc >> 16; /* 8, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ assert(masked == (low_rc & MASK8));
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ assert(masked == ((low_rc >> 2) & MASK8));
+ counts[masked] += 1;
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ assert(masked == ((low_rc >> 4) & MASK8));
+ counts[masked] += 1;
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ assert(masked == ((low_rc >> 6) & MASK8));
+ counts[masked] += 1;
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ assert(masked == ((low_rc >> 8) & MASK8));
+ counts[masked] += 1;
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ assert(masked == ((low_rc >> 10) & MASK8));
+ counts[masked] += 1;
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ assert(masked == ((low_rc >> 12) & MASK8));
+ counts[masked] += 1;
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ assert(masked == ((low_rc >> 14) & MASK8));
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
+
+
+ masked = low_rc >> 16; /* 8, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+ oligo = low_rc >> 18; /* For 15..9 */
+ oligo |= high_rc << 14;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK8; /* 9 */
+ counts[masked] += 1;
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 2) & MASK8; /* 10 */
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 4) & MASK8; /* 11 */
+ counts[masked] += 1;
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 6) & MASK8; /* 12 */
+ counts[masked] += 1;
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 8) & MASK8; /* 13 */
+ counts[masked] += 1;
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 10) & MASK8; /* 14 */
+ counts[masked] += 1;
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 12) & MASK8; /* 15 */
+ counts[masked] += 1;
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ assert(masked == (oligo & MASK8));
+ counts[masked] += 1;
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ assert(masked == ((oligo >> 2) & MASK8));
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ assert(masked == ((oligo >> 4) & MASK8));
+ counts[masked] += 1;
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ assert(masked == ((oligo >> 6) & MASK8));
+ counts[masked] += 1;
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ assert(masked == ((oligo >> 8) & MASK8));
+ counts[masked] += 1;
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ assert(masked == ((oligo >> 10) & MASK8));
+ counts[masked] += 1;
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ assert(masked == ((oligo >> 12) & MASK8));
+ counts[masked] += 1;
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rc & MASK8; /* 16 */
+ counts[masked] += 1;
+ debug(printf("16 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 2) & MASK8; /* 17 */
+ counts[masked] += 1;
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 4) & MASK8; /* 18 */
+ counts[masked] += 1;
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 6) & MASK8; /* 19 */
+ counts[masked] += 1;
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 8) & MASK8; /* 20 */
+ counts[masked] += 1;
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 10) & MASK8; /* 21 */
+ counts[masked] += 1;
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 12) & MASK8; /* 22 */
+ counts[masked] += 1;
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 14) & MASK8; /* 23 */
+ counts[masked] += 1;
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
+
+ masked = high_rc >> 16; /* 24, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ assert(masked == (high_rc & MASK8));
+ counts[masked] += 1;
+ debug(printf("16 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ assert(masked == ((high_rc >> 2) & MASK8));
+ counts[masked] += 1;
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ assert(masked == ((high_rc >> 4) & MASK8));
+ counts[masked] += 1;
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ assert(masked == ((high_rc >> 6) & MASK8));
+ counts[masked] += 1;
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ assert(masked == ((high_rc >> 8) & MASK8));
+ counts[masked] += 1;
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ assert(masked == ((high_rc >> 10) & MASK8));
+ counts[masked] += 1;
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ assert(masked == ((high_rc >> 12) & MASK8));
+ counts[masked] += 1;
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ assert(masked == ((high_rc >> 14) & MASK8));
+ counts[masked] += 1;
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
+
+
+ masked = high_rc >> 16; /* 24, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+ oligo = high_rc >> 18; /* For 31..25 */
+ oligo |= nextlow_rc << 14;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK8; /* 25 */
+ counts[masked] += 1;
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 2) & MASK8; /* 26 */
+ counts[masked] += 1;
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 4) & MASK8; /* 27 */
+ counts[masked] += 1;
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 6) & MASK8; /* 28 */
+ counts[masked] += 1;
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 8) & MASK8; /* 29 */
+ counts[masked] += 1;
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 10) & MASK8; /* 30 */
+ counts[masked] += 1;
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 12) & MASK8; /* 31 */
+ counts[masked] += 1;
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ assert(masked == (oligo & MASK8));
+ counts[masked] += 1;
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ assert(masked == ((oligo >> 2) & MASK8));
+ counts[masked] += 1;
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ assert(masked == ((oligo >> 4) & MASK8));
+ counts[masked] += 1;
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ assert(masked == ((oligo >> 6) & MASK8));
+ counts[masked] += 1;
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ assert(masked == ((oligo >> 8) & MASK8));
+ counts[masked] += 1;
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ assert(masked == ((oligo >> 10) & MASK8));
+ counts[masked] += 1;
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ assert(masked == ((oligo >> 12) & MASK8));
+ counts[masked] += 1;
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
+#endif
+
+ return;
+}
+
+
+/* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc},
+ and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */
+#ifdef USE_SIMD_FOR_COUNTS
+static void
+extract_8mers_rev_simd (__m128i *out, __m128i current, __m128i next) {
+ __m128i oligo;
+
+ oligo = _mm_or_si128( _mm_srli_epi32(current,18), _mm_slli_epi32(next,14));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,12), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask8));
+ _mm_store_si128(out++, _mm_and_si128( oligo, mask8));
+
+ _mm_store_si128(out++, _mm_srli_epi32(current,16)); /* No mask necessary */;
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask8));
+ _mm_store_si128(out++, _mm_and_si128( current, mask8));
+
+ return;
+}
+
+static void
+count_8mers_rev_simd (Count_T *counts, __m128i current, __m128i next) {
+ __m128i oligo;
+ Genomecomp_T array[4];
+
+ oligo = _mm_or_si128( _mm_srli_epi32(current,18), _mm_slli_epi32(next,14));
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,12), mask8));
+ counts[array[0]] += 1; /* 63 */
+ counts[array[1]] += 1; /* 47 */
+ counts[array[2]] += 1; /* 31 */
+ counts[array[3]] += 1; /* 15 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,10), mask8));
+ counts[array[0]] += 1; /* 62 */
+ counts[array[1]] += 1; /* 46 */
+ counts[array[2]] += 1; /* 30 */
+ counts[array[3]] += 1; /* 14 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask8));
+ counts[array[0]] += 1; /* 61 */
+ counts[array[1]] += 1; /* 45 */
+ counts[array[2]] += 1; /* 29 */
+ counts[array[3]] += 1; /* 13 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask8));
+ counts[array[0]] += 1; /* 60 */
+ counts[array[1]] += 1; /* 44 */
+ counts[array[2]] += 1; /* 28 */
+ counts[array[3]] += 1; /* 12 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask8));
+ counts[array[0]] += 1; /* 59 */
+ counts[array[1]] += 1; /* 43 */
+ counts[array[2]] += 1; /* 27 */
+ counts[array[3]] += 1; /* 11 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask8));
+ counts[array[0]] += 1; /* 58 */
+ counts[array[1]] += 1; /* 42 */
+ counts[array[2]] += 1; /* 26 */
+ counts[array[3]] += 1; /* 10 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask8));
+ counts[array[0]] += 1; /* 57 */
+ counts[array[1]] += 1; /* 41 */
+ counts[array[2]] += 1; /* 25 */
+ counts[array[3]] += 1; /* 9 */
+
+
+ _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,16)); /* No mask necessary */;
+ counts[array[0]] += 1; /* 56 */
+ counts[array[1]] += 1; /* 50 */
+ counts[array[2]] += 1; /* 24 */
+ counts[array[3]] += 1; /* 8 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask8));
+ counts[array[0]] += 1; /* 55 */
+ counts[array[1]] += 1; /* 39 */
+ counts[array[2]] += 1; /* 23 */
+ counts[array[3]] += 1; /* 7 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask8));
+ counts[array[0]] += 1; /* 54 */
+ counts[array[1]] += 1; /* 38 */
+ counts[array[2]] += 1; /* 22 */
+ counts[array[3]] += 1; /* 6 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask8));
+ counts[array[0]] += 1; /* 53 */
+ counts[array[1]] += 1; /* 37 */
+ counts[array[2]] += 1; /* 21 */
+ counts[array[3]] += 1; /* 5 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask8));
+ counts[array[0]] += 1; /* 52 */
+ counts[array[1]] += 1; /* 36 */
+ counts[array[2]] += 1; /* 20 */
+ counts[array[3]] += 1; /* 4 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask8));
+ counts[array[0]] += 1; /* 51 */
+ counts[array[1]] += 1; /* 35 */
+ counts[array[2]] += 1; /* 19 */
+ counts[array[3]] += 1; /* 3 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask8));
+ counts[array[0]] += 1; /* 50 */
+ counts[array[1]] += 1; /* 34 */
+ counts[array[2]] += 1; /* 18 */
+ counts[array[3]] += 1; /* 2 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask8));
+ counts[array[0]] += 1; /* 49 */
+ counts[array[1]] += 1; /* 33 */
+ counts[array[2]] += 1; /* 17 */
+ counts[array[3]] += 1; /* 1 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask8));
+ counts[array[0]] += 1; /* 48 */
+ counts[array[1]] += 1; /* 32 */
+ counts[array[2]] += 1; /* 16 */
+ counts[array[3]] += 1; /* 0 */
+
+ return;
+}
+
+#endif
+
+
+
+static int
+store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+ Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+ Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rc & MASK8; /* 0 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+
+ masked = (low_rc >> 2) & MASK8; /* 1 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 1;
+ }
+ }
+
+ masked = (low_rc >> 4) & MASK8; /* 2 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 2;
+ }
+ }
+
+ masked = (low_rc >> 6) & MASK8; /* 3 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 3;
+ }
+ }
+
+ masked = (low_rc >> 8) & MASK8; /* 4 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 4;
+ }
+ }
+
+ masked = (low_rc >> 10) & MASK8; /* 5 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 5;
+ }
+ }
+
+ masked = (low_rc >> 12) & MASK8; /* 6 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 6;
+ }
+ }
+
+ masked = (low_rc >> 14) & MASK8; /* 7 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 7;
+ }
+ }
+
+ masked = low_rc >> 16; /* 8, No mask necessary */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 8;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ assert(masked == (low_rc & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ assert(masked == ((low_rc >> 2) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 1;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ assert(masked == ((low_rc >> 4) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 2;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ assert(masked == ((low_rc >> 6) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 3;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ assert(masked == ((low_rc >> 8) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 4;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ assert(masked == ((low_rc >> 10) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 5;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ assert(masked == ((low_rc >> 12) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 6;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ assert(masked == ((low_rc >> 14) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 7;
+ }
+ }
+
+
+ masked = low_rc >> 16; /* 8, No mask necessary */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 8;
+ }
+ }
+#endif
+
+
+ oligo = low_rc >> 18; /* For 15..9 */
+ oligo |= high_rc << 14;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK8; /* 9 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 9;
+ }
+ }
+
+ masked = (oligo >> 2) & MASK8; /* 10 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 10;
+ }
+ }
+
+ masked = (oligo >> 4) & MASK8; /* 11 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 11;
+ }
+ }
+
+ masked = (oligo >> 6) & MASK8; /* 12 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 12;
+ }
+ }
+
+ masked = (oligo >> 8) & MASK8; /* 13 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 13;
+ }
+ }
+
+ masked = (oligo >> 10) & MASK8; /* 14 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 14;
+ }
+ }
+
+ masked = (oligo >> 12) & MASK8; /* 15 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 15;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ assert(masked == (oligo & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 9;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ assert(masked == ((oligo >> 2) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 10;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ assert(masked == ((oligo >> 4) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 11;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ assert(masked == ((oligo >> 6) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 12;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ assert(masked == ((oligo >> 8) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 13;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ assert(masked == ((oligo >> 10) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 14;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ assert(masked == ((oligo >> 12) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 15;
+ }
+ }
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rc & MASK8; /* 16 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 16;
+ }
+ }
+
+ masked = (high_rc >> 2) & MASK8; /* 17 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 17;
+ }
+ }
+
+ masked = (high_rc >> 4) & MASK8; /* 18 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 18;
+ }
+ }
+
+ masked = (high_rc >> 6) & MASK8; /* 19 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 19;
+ }
+ }
+
+ masked = (high_rc >> 8) & MASK8; /* 20 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 20;
+ }
+ }
+
+ masked = (high_rc >> 10) & MASK8; /* 21 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 21;
+ }
+ }
+
+ masked = (high_rc >> 12) & MASK8; /* 22 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 22;
+ }
+ }
+
+ masked = (high_rc >> 14) & MASK8; /* 23 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 23;
+ }
+ }
+
+ masked = high_rc >> 16; /* 24, No mask necessary */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 24;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ assert(masked == (high_rc & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 16;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ assert(masked == ((high_rc >> 2) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 17;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ assert(masked == ((high_rc >> 4) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 18;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ assert(masked == ((high_rc >> 6) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 19;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ assert(masked == ((high_rc >> 8) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 20;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ assert(masked == ((high_rc >> 10) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 21;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ assert(masked == ((high_rc >> 12) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 22;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ assert(masked == ((high_rc >> 14) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 23;
+ }
+ }
+
+
+ masked = high_rc >> 16; /* 24, No mask necessary */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 24;
+ }
+ }
+#endif
+
+
+ oligo = high_rc >> 18; /* For 31..25 */
+ oligo |= nextlow_rc << 14;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK8; /* 25 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 25;
+ }
+ }
+
+ masked = (oligo >> 2) & MASK8; /* 26 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 26;
+ }
+ }
+
+ masked = (oligo >> 4) & MASK8; /* 27 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 27;
+ }
+ }
+
+ masked = (oligo >> 6) & MASK8; /* 28 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 28;
+ }
+ }
+
+ masked = (oligo >> 8) & MASK8; /* 29 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 29;
+ }
+ }
+
+ masked = (oligo >> 10) & MASK8; /* 30 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 30;
+ }
+ }
+
+ masked = (oligo >> 12) & MASK8; /* 31 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 31;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ assert(masked == (oligo & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 25;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ assert(masked == ((oligo >> 2) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 26;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ assert(masked == ((oligo >> 4) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 27;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ assert(masked == ((oligo >> 6) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 28;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask8);
+
+ masked = _mm_extract_epi32(_masked,0);
+ assert(masked == ((oligo >> 8) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 29;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ assert(masked == ((oligo >> 10) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 30;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ assert(masked == ((oligo >> 12) & MASK8));
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 31;
+ }
+ }
+#endif
+
+ return chrpos - 32;
+}
+
+
+
+static void
+count_7mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+ Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rc & MASK7; /* 0 */
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 2) & MASK7; /* 1 */
+ counts[masked] += 1;
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 4) & MASK7; /* 2 */
+ counts[masked] += 1;
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 6) & MASK7; /* 3 */
+ counts[masked] += 1;
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 8) & MASK7; /* 4 */
+ counts[masked] += 1;
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 10) & MASK7; /* 5 */
+ counts[masked] += 1;
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 12) & MASK7; /* 6 */
+ counts[masked] += 1;
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 14) & MASK7; /* 7 */
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 16) & MASK7; /* 8 */
+ counts[masked] += 1;
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
+
+ masked = low_rc >> 18; /* 9, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+ oligo = low_rc >> 20; /* For 15..10 */
+ oligo |= high_rc << 12;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK7; /* 10 */
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 2) & MASK7; /* 11 */
+ counts[masked] += 1;
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 4) & MASK7; /* 12 */
+ counts[masked] += 1;
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 6) & MASK7; /* 13 */
+ counts[masked] += 1;
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 8) & MASK7; /* 14 */
+ counts[masked] += 1;
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 10) & MASK7; /* 15 */
+ counts[masked] += 1;
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rc & MASK7; /* 16 */
+ counts[masked] += 1;
+ debug(printf("16 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 2) & MASK7; /* 17 */
+ counts[masked] += 1;
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 4) & MASK7; /* 18 */
+ counts[masked] += 1;
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 6) & MASK7; /* 19 */
+ counts[masked] += 1;
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 8) & MASK7; /* 20 */
+ counts[masked] += 1;
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 10) & MASK7; /* 21 */
+ counts[masked] += 1;
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 12) & MASK7; /* 22 */
+ counts[masked] += 1;
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 14) & MASK7; /* 23 */
+ counts[masked] += 1;
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
+
+ masked = (high_rc >> 16) & MASK7; /* 24 */
+ counts[masked] += 1;
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
+
+ masked = high_rc >> 18; /* 25, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("16 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+ oligo = high_rc >> 20; /* For 31..26 */
+ oligo |= nextlow_rc << 12;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK7; /* 26 */
+ counts[masked] += 1;
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 2) & MASK7; /* 27 */
+ counts[masked] += 1;
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 4) & MASK7; /* 28 */
+ counts[masked] += 1;
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 6) & MASK7; /* 29 */
+ counts[masked] += 1;
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 8) & MASK7; /* 30 */
+ counts[masked] += 1;
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
+
+ masked = (oligo >> 10) & MASK7; /* 31 */
+ counts[masked] += 1;
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
+#endif
+
+ return;
+}
+
+
+/* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc},
+ and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */
+#ifdef USE_SIMD_FOR_COUNTS
+static void
+extract_7mers_rev_simd (__m128i *out, __m128i current, __m128i next) {
+ __m128i oligo;
+
+ oligo = _mm_or_si128( _mm_srli_epi32(current,20), _mm_slli_epi32(next,12));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask7));
+ _mm_store_si128(out++, _mm_and_si128( oligo, mask7));
+
+ _mm_store_si128(out++, _mm_srli_epi32(current,18)); /* No mask necessary */
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask7));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask7));
+ _mm_store_si128(out++, _mm_and_si128( current, mask7));
+
+ return;
+}
+
+static void
+count_7mers_rev_simd (Count_T *counts, __m128i current, __m128i next) {
+ __m128i oligo;
+ Genomecomp_T array[4];
+
+ oligo = _mm_or_si128( _mm_srli_epi32(current,20), _mm_slli_epi32(next,12));
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,10), mask7));
+ counts[array[0]] += 1; /* 63 */
+ counts[array[1]] += 1; /* 47 */
+ counts[array[2]] += 1; /* 31 */
+ counts[array[3]] += 1; /* 15 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask7));
+ counts[array[0]] += 1; /* 62 */
+ counts[array[1]] += 1; /* 46 */
+ counts[array[2]] += 1; /* 30 */
+ counts[array[3]] += 1; /* 14 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask7));
+ counts[array[0]] += 1; /* 61 */
+ counts[array[1]] += 1; /* 45 */
+ counts[array[2]] += 1; /* 29 */
+ counts[array[3]] += 1; /* 13 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask7));
+ counts[array[0]] += 1; /* 60 */
+ counts[array[1]] += 1; /* 44 */
+ counts[array[2]] += 1; /* 28 */
+ counts[array[3]] += 1; /* 12 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask7));
+ counts[array[0]] += 1; /* 59 */
+ counts[array[1]] += 1; /* 43 */
+ counts[array[2]] += 1; /* 27 */
+ counts[array[3]] += 1; /* 11 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask7));
+ counts[array[0]] += 1; /* 58 */
+ counts[array[1]] += 1; /* 42 */
+ counts[array[2]] += 1; /* 26 */
+ counts[array[3]] += 1; /* 10 */
+
+
+ _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,18)); /* No mask necessary */
+ counts[array[0]] += 1; /* 57 */
+ counts[array[1]] += 1; /* 41 */
+ counts[array[2]] += 1; /* 25 */
+ counts[array[3]] += 1; /* 9 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,16), mask7));
+ counts[array[0]] += 1; /* 56 */
+ counts[array[1]] += 1; /* 50 */
+ counts[array[2]] += 1; /* 24 */
+ counts[array[3]] += 1; /* 8 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask7));
+ counts[array[0]] += 1; /* 55 */
+ counts[array[1]] += 1; /* 39 */
+ counts[array[2]] += 1; /* 23 */
+ counts[array[3]] += 1; /* 7 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask7));
+ counts[array[0]] += 1; /* 54 */
+ counts[array[1]] += 1; /* 38 */
+ counts[array[2]] += 1; /* 22 */
+ counts[array[3]] += 1; /* 6 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask7));
+ counts[array[0]] += 1; /* 53 */
+ counts[array[1]] += 1; /* 37 */
+ counts[array[2]] += 1; /* 21 */
+ counts[array[3]] += 1; /* 5 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask7));
+ counts[array[0]] += 1; /* 52 */
+ counts[array[1]] += 1; /* 36 */
+ counts[array[2]] += 1; /* 20 */
+ counts[array[3]] += 1; /* 4 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask7));
+ counts[array[0]] += 1; /* 51 */
+ counts[array[1]] += 1; /* 35 */
+ counts[array[2]] += 1; /* 19 */
+ counts[array[3]] += 1; /* 3 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask7));
+ counts[array[0]] += 1; /* 50 */
+ counts[array[1]] += 1; /* 34 */
+ counts[array[2]] += 1; /* 18 */
+ counts[array[3]] += 1; /* 2 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask7));
+ counts[array[0]] += 1; /* 49 */
+ counts[array[1]] += 1; /* 33 */
+ counts[array[2]] += 1; /* 17 */
+ counts[array[3]] += 1; /* 1 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask7));
+ counts[array[0]] += 1; /* 48 */
+ counts[array[1]] += 1; /* 32 */
+ counts[array[2]] += 1; /* 16 */
+ counts[array[3]] += 1; /* 0 */
+
+ return;
+}
+#endif
+
+
+static Chrpos_T
+store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+ Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+ Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rc & MASK7; /* 0 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+
+ masked = (low_rc >> 2) & MASK7; /* 1 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 1;
+ }
+ }
+
+ masked = (low_rc >> 4) & MASK7; /* 2 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 2;
+ }
+ }
+
+ masked = (low_rc >> 6) & MASK7; /* 3 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 3;
+ }
+ }
+
+ masked = (low_rc >> 8) & MASK7; /* 4 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 4;
+ }
+ }
+
+ masked = (low_rc >> 10) & MASK7; /* 5 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 5;
+ }
+ }
+
+ masked = (low_rc >> 12) & MASK7; /* 6 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 6;
+ }
+ }
+
+ masked = (low_rc >> 14) & MASK7; /* 7 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 7;
+ }
+ }
+
+ masked = (low_rc >> 16) & MASK7; /* 8 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 8;
+ }
+ }
+
+ masked = low_rc >> 18; /* 9, No mask necessary */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 9;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 1;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 2;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 3;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 4;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 5;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 6;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 7;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 8;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 9;
+ }
+ }
+#endif
+
+
+ oligo = low_rc >> 20; /* For 15..10 */
+ oligo |= high_rc << 12;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK7; /* 10 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 10;
+ }
+ }
+
+ masked = (oligo >> 2) & MASK7; /* 11 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 11;
+ }
+ }
+
+ masked = (oligo >> 4) & MASK7; /* 12 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 12;
+ }
+ }
+
+ masked = (oligo >> 6) & MASK7; /* 13 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 13;
+ }
+ }
+
+ masked = (oligo >> 8) & MASK7; /* 14 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 14;
+ }
+ }
+
+ masked = (oligo >> 10) & MASK7; /* 15 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 15;
+ }
+ }
+
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 10;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 11;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 12;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 13;
+ }
+ }
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 14;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 15;
+ }
+ }
+
#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); /* low = Cmet_reduce_ga(low); */ nextlow = Cmet_reduce_ga(nextlow);
- }
- }
- high_rev = nexthigh_rev; /* depended on low */
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
- chrpos = store_5mers_fwd(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev);
- ptr += 3;
- }
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rc & MASK7; /* 16 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 16;
+ }
+ }
+ masked = (high_rc >> 2) & MASK7; /* 17 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
} else {
- abort();
+ *(--pointers[masked]) = chrpos - 17;
}
+ }
+ masked = (high_rc >> 4) & MASK7; /* 18 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 18;
+ }
+ }
-#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- /* low = Bigendian_convert_uint(ref_blocks[ptr+1]); */
- nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
- high = ref_blocks[ptr];
- /* low = ref_blocks[ptr+1]; */
- nextlow = ref_blocks[ptr+4];
-#endif
- if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else if (mode == CMET_NONSTRANDED) {
- if (genestrand > 0) {
- high = Cmet_reduce_ct(high); /* low = Cmet_reduce_ct(low); */ nextlow = Cmet_reduce_ct(nextlow);
- } else {
- high = Cmet_reduce_ga(high); /* low = Cmet_reduce_ga(low); */ nextlow = Cmet_reduce_ga(nextlow);
- }
+ masked = (high_rc >> 6) & MASK7; /* 19 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 19;
}
+ }
- high_rev = nexthigh_rev; /* depended on low */
- low_rev = reverse_nt[high >> 16];
- low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
- nexthigh_rev = reverse_nt[nextlow >> 16];
- nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+ masked = (high_rc >> 8) & MASK7; /* 20 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 20;
+ }
+ }
- if (indexsize == 8) {
- chrpos = store_8mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
- } else if (indexsize == 7) {
- chrpos = store_7mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
- } else if (indexsize == 6) {
- chrpos = store_6mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
- } else if (indexsize == 5) {
- chrpos = store_5mers_fwd_partial(chrpos,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+ masked = (high_rc >> 10) & MASK7; /* 21 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
} else {
- abort();
+ *(--pointers[masked]) = chrpos - 21;
}
+ }
+ masked = (high_rc >> 12) & MASK7; /* 22 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 22;
+ }
}
-
- return;
-}
-#endif
+ masked = (high_rc >> 14) & MASK7; /* 23 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 23;
+ }
+ }
-/************************************************************************
- * REV
- ************************************************************************/
+ masked = (high_rc >> 16) & MASK7; /* 24 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 24;
+ }
+ }
-static void
-count_8mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
- int startdiscard, int enddiscard) {
- Genomecomp_T masked;
- int pos;
+ masked = high_rc >> 18; /* 25, No mask necessary */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 25;
+ }
+ }
- pos = enddiscard;
+#else
+ _oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask7);
- while (pos >= startdiscard && pos >= 25) {
- masked = high_rc >> (2*pos - 32);
- masked |= nextlow_rc << (64 - 2*pos);
- masked &= MASK8;
- counts[masked] += 1;
- debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos--;
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 16;
+ }
}
- while (pos >= startdiscard && pos >= 16) {
- masked = high_rc >> (2*pos - 32);
- masked &= MASK8;
- counts[masked] += 1;
- debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos--;
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 17;
+ }
}
- while (pos >= startdiscard && pos >= 9) {
- masked = low_rc >> 2*pos;
- masked |= high_rc << (32 - 2*pos);
- masked &= MASK8;
- counts[masked] += 1;
- debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos--;
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 18;
+ }
}
- while (pos >= startdiscard) {
- masked = low_rc >> 2*pos;
- masked &= MASK8;
- counts[masked] += 1;
- debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos--;
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 19;
+ }
}
- return;
-}
-
-static int
-store_8mers_rev_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
- Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
- int startdiscard, int enddiscard) {
- Genomecomp_T masked;
- int pos;
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
- pos = enddiscard;
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 20;
+ }
+ }
- while (pos >= startdiscard && pos >= 25) {
- masked = high_rc >> (2*pos - 32);
- masked |= nextlow_rc << (64 - 2*pos);
- masked &= MASK8;
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 21;
}
- chrpos++;
- pos--;
}
- while (pos >= startdiscard && pos >= 16) {
- masked = high_rc >> (2*pos - 32);
- masked &= MASK8;
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 22;
}
- chrpos++;
- pos--;
}
- while (pos >= startdiscard && pos >= 9) {
- masked = low_rc >> 2*pos;
- masked |= high_rc << (32 - 2*pos);
- masked &= MASK8;
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 23;
}
- chrpos++;
- pos--;
}
- while (pos >= startdiscard) {
- masked = low_rc >> 2*pos;
- masked &= MASK8;
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 24;
}
- chrpos++;
- pos--;
}
- return chrpos;
-}
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 25;
+ }
+ }
+#endif
-static void
-count_7mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
- int startdiscard, int enddiscard) {
- Genomecomp_T masked;
- int pos;
+ oligo = high_rc >> 20; /* For 31..26 */
+ oligo |= nextlow_rc << 12;
- pos = enddiscard;
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK7; /* 26 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 26;
+ }
+ }
- while (pos >= startdiscard && pos >= 26) {
- masked = high_rc >> (2*pos - 32);
- masked |= nextlow_rc << (64 - 2*pos);
- masked &= MASK7;
- counts[masked] += 1;
- debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos--;
+ masked = (oligo >> 2) & MASK7; /* 27 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 27;
+ }
}
- while (pos >= startdiscard && pos >= 16) {
- masked = high_rc >> (2*pos - 32);
- masked &= MASK7;
- counts[masked] += 1;
- debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos--;
+ masked = (oligo >> 4) & MASK7; /* 28 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 28;
+ }
}
- while (pos >= startdiscard && pos >= 10) {
- masked = low_rc >> 2*pos;
- masked |= high_rc << (32 - 2*pos);
- masked &= MASK7;
- counts[masked] += 1;
- debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos--;
+ masked = (oligo >> 6) & MASK7; /* 29 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 29;
+ }
}
- while (pos >= startdiscard) {
- masked = low_rc >> 2*pos;
- masked &= MASK7;
- counts[masked] += 1;
- debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos--;
+ masked = (oligo >> 8) & MASK7; /* 30 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 30;
+ }
}
- return;
-}
+ masked = (oligo >> 10) & MASK7; /* 31 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 31;
+ }
+ }
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask7);
-static int
-store_7mers_rev_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
- Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
- int startdiscard, int enddiscard) {
- Genomecomp_T masked;
- int pos;
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 26;
+ }
+ }
- pos = enddiscard;
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 27;
+ }
+ }
- while (pos >= startdiscard && pos >= 26) {
- masked = high_rc >> (2*pos - 32);
- masked |= nextlow_rc << (64 - 2*pos);
- masked &= MASK7;
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 28;
}
- chrpos++;
- pos--;
}
- while (pos >= startdiscard && pos >= 16) {
- masked = high_rc >> (2*pos - 32);
- masked &= MASK7;
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 29;
}
- chrpos++;
- pos--;
}
- while (pos >= startdiscard && pos >= 10) {
- masked = low_rc >> 2*pos;
- masked |= high_rc << (32 - 2*pos);
- masked &= MASK7;
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask7);
+
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 30;
}
- chrpos++;
- pos--;
}
- while (pos >= startdiscard) {
- masked = low_rc >> 2*pos;
- masked &= MASK7;
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 31;
}
- chrpos++;
- pos--;
}
+#endif
- return chrpos;
+ return chrpos - 32;
}
static void
-count_6mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
- int startdiscard, int enddiscard) {
- Genomecomp_T masked;
- int pos;
+count_6mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+ Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
- pos = enddiscard;
- while (pos >= startdiscard && pos >= 27) {
- masked = high_rc >> (2*pos - 32);
- masked |= nextlow_rc << (64 - 2*pos);
- masked &= MASK6;
- counts[masked] += 1;
- debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos--;
- }
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rc & MASK6; /* 0 */
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
- while (pos >= startdiscard && pos >= 16) {
- masked = high_rc >> (2*pos - 32);
- masked &= MASK6;
- counts[masked] += 1;
- debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos--;
- }
+ masked = (low_rc >> 2) & MASK6; /* 1 */
+ counts[masked] += 1;
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
- while (pos >= startdiscard && pos >= 11) {
- masked = low_rc >> 2*pos;
- masked |= high_rc << (32 - 2*pos);
- masked &= MASK6;
- counts[masked] += 1;
- debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos--;
- }
+ masked = (low_rc >> 4) & MASK6; /* 2 */
+ counts[masked] += 1;
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
- while (pos >= startdiscard) {
- masked = low_rc >> 2*pos;
- masked &= MASK6;
- counts[masked] += 1;
- debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos--;
- }
+ masked = (low_rc >> 6) & MASK6; /* 3 */
+ counts[masked] += 1;
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
- return;
-}
+ masked = (low_rc >> 8) & MASK6; /* 4 */
+ counts[masked] += 1;
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
+ masked = (low_rc >> 10) & MASK6; /* 5 */
+ counts[masked] += 1;
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
-static int
-store_6mers_rev_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
- Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
- int startdiscard, int enddiscard) {
- Genomecomp_T masked;
- int pos;
+ masked = (low_rc >> 12) & MASK6; /* 6 */
+ counts[masked] += 1;
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
- pos = enddiscard;
+ masked = (low_rc >> 14) & MASK6; /* 7 */
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
- while (pos >= startdiscard && pos >= 27) {
- masked = high_rc >> (2*pos - 32);
- masked |= nextlow_rc << (64 - 2*pos);
- masked &= MASK6;
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
- }
- chrpos++;
- pos--;
- }
+ masked = (low_rc >> 16) & MASK6; /* 8 */
+ counts[masked] += 1;
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
- while (pos >= startdiscard && pos >= 16) {
- masked = high_rc >> (2*pos - 32);
- masked &= MASK6;
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
- }
- chrpos++;
- pos--;
- }
+ masked = (low_rc >> 18) & MASK6; /* 9 */
+ counts[masked] += 1;
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
- while (pos >= startdiscard && pos >= 11) {
- masked = low_rc >> 2*pos;
- masked |= high_rc << (32 - 2*pos);
- masked &= MASK6;
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
- }
- chrpos++;
- pos--;
- }
+ masked = low_rc >> 20; /* 10, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
- while (pos >= startdiscard) {
- masked = low_rc >> 2*pos;
- masked &= MASK6;
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
- }
- chrpos++;
- pos--;
- }
+#else
+ _oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask6);
- return chrpos;
-}
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
-static void
-count_5mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
- int startdiscard, int enddiscard) {
- Genomecomp_T masked;
- int pos;
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
- pos = enddiscard;
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
- while (pos >= startdiscard && pos >= 28) {
- masked = high_rc >> (2*pos - 32);
- masked |= nextlow_rc << (64 - 2*pos);
- masked &= MASK5;
- counts[masked] += 1;
- debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos--;
- }
- while (pos >= startdiscard && pos >= 16) {
- masked = high_rc >> (2*pos - 32);
- masked &= MASK5;
- counts[masked] += 1;
- debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos--;
- }
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask6);
- while (pos >= startdiscard && pos >= 12) {
- masked = low_rc >> 2*pos;
- masked |= high_rc << (32 - 2*pos);
- masked &= MASK5;
- counts[masked] += 1;
- debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos--;
- }
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
- while (pos >= startdiscard) {
- masked = low_rc >> 2*pos;
- masked &= MASK5;
- counts[masked] += 1;
- debug(printf("%d %04X => %d\n",pos,masked,counts[masked]));
- pos--;
- }
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
- return;
-}
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
-static int
-store_5mers_rev_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
- Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
- int startdiscard, int enddiscard) {
- Genomecomp_T masked;
- int pos;
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask6);
- pos = enddiscard;
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
- while (pos >= startdiscard && pos >= 28) {
- masked = high_rc >> (2*pos - 32);
- masked |= nextlow_rc << (64 - 2*pos);
- masked &= MASK5;
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
- }
- chrpos++;
- pos--;
- }
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
- while (pos >= startdiscard && pos >= 16) {
- masked = high_rc >> (2*pos - 32);
- masked &= MASK5;
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
- }
- chrpos++;
- pos--;
- }
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
+#endif
- while (pos >= startdiscard && pos >= 12) {
- masked = low_rc >> 2*pos;
- masked |= high_rc << (32 - 2*pos);
- masked &= MASK5;
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
- }
- chrpos++;
- pos--;
- }
- while (pos >= startdiscard) {
- masked = low_rc >> 2*pos;
- masked &= MASK5;
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
- }
- chrpos++;
- pos--;
- }
+ oligo = low_rc >> 22; /* For 15..11 */
+ oligo |= high_rc << 10;
- return chrpos;
-}
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK6; /* 11 */
+ counts[masked] += 1;
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
+ masked = (oligo >> 2) & MASK6; /* 12 */
+ counts[masked] += 1;
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
-static void
-count_8mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
- Genomecomp_T masked, oligo;
+ masked = (oligo >> 4) & MASK6; /* 13 */
+ counts[masked] += 1;
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
- oligo = high_rc >> 18; /* For 31..25 */
- oligo |= nextlow_rc << 14;
+ masked = (oligo >> 6) & MASK6; /* 14 */
+ counts[masked] += 1;
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 12) & MASK8; /* 31 */
+ masked = (oligo >> 8) & MASK6; /* 15 */
counts[masked] += 1;
- debug(printf("31 %04X => %d\n",masked,counts[masked]));
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 10) & MASK8; /* 30 */
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
counts[masked] += 1;
- debug(printf("30 %04X => %d\n",masked,counts[masked]));
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 8) & MASK8; /* 29 */
+ masked = _mm_extract_epi32(_masked,1);
counts[masked] += 1;
- debug(printf("29 %04X => %d\n",masked,counts[masked]));
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 6) & MASK8; /* 28 */
+ masked = _mm_extract_epi32(_masked,2);
counts[masked] += 1;
- debug(printf("28 %04X => %d\n",masked,counts[masked]));
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 4) & MASK8; /* 27 */
+ masked = _mm_extract_epi32(_masked,3);
counts[masked] += 1;
- debug(printf("27 %04X => %d\n",masked,counts[masked]));
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 2) & MASK8; /* 26 */
+
+ masked = (oligo >> 8) & MASK6; /* 15 */
counts[masked] += 1;
- debug(printf("26 %04X => %d\n",masked,counts[masked]));
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
+#endif
- masked = oligo & MASK8; /* 25 */
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rc & MASK6; /* 16 */
counts[masked] += 1;
- debug(printf("25 %04X => %d\n",masked,counts[masked]));
+ debug(printf("16 %04X => %d\n",masked,counts[masked]));
+ masked = (high_rc >> 2) & MASK6; /* 17 */
+ counts[masked] += 1;
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
- masked = high_rc >> 16; /* 24, No mask necessary */
+ masked = (high_rc >> 4) & MASK6; /* 18 */
counts[masked] += 1;
- debug(printf("24 %04X => %d\n",masked,counts[masked]));
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
- masked = (high_rc >> 14) & MASK8; /* 23 */
+ masked = (high_rc >> 6) & MASK6; /* 19 */
counts[masked] += 1;
- debug(printf("23 %04X => %d\n",masked,counts[masked]));
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
- masked = (high_rc >> 12) & MASK8; /* 22 */
+ masked = (high_rc >> 8) & MASK6; /* 20 */
counts[masked] += 1;
- debug(printf("22 %04X => %d\n",masked,counts[masked]));
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
- masked = (high_rc >> 10) & MASK8; /* 21 */
+ masked = (high_rc >> 10) & MASK6; /* 21 */
counts[masked] += 1;
debug(printf("21 %04X => %d\n",masked,counts[masked]));
- masked = (high_rc >> 8) & MASK8; /* 20 */
+ masked = (high_rc >> 12) & MASK6; /* 22 */
counts[masked] += 1;
- debug(printf("20 %04X => %d\n",masked,counts[masked]));
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
- masked = (high_rc >> 6) & MASK8; /* 19 */
+ masked = (high_rc >> 14) & MASK6; /* 23 */
counts[masked] += 1;
- debug(printf("19 %04X => %d\n",masked,counts[masked]));
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
- masked = (high_rc >> 4) & MASK8; /* 18 */
+ masked = (high_rc >> 16) & MASK6; /* 24 */
counts[masked] += 1;
- debug(printf("18 %04X => %d\n",masked,counts[masked]));
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
- masked = (high_rc >> 2) & MASK8; /* 17 */
+ masked = (high_rc >> 18) & MASK6; /* 25 */
counts[masked] += 1;
- debug(printf("17 %04X => %d\n",masked,counts[masked]));
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
- masked = high_rc & MASK8; /* 16 */
+ masked = high_rc >> 20; /* 26, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
counts[masked] += 1;
debug(printf("16 %04X => %d\n",masked,counts[masked]));
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
- oligo = low_rc >> 18; /* For 15..9 */
- oligo |= high_rc << 14;
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 12) & MASK8; /* 15 */
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
counts[masked] += 1;
- debug(printf("15 %04X => %d\n",masked,counts[masked]));
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 10) & MASK8; /* 14 */
+ masked = _mm_extract_epi32(_masked,1);
counts[masked] += 1;
- debug(printf("14 %04X => %d\n",masked,counts[masked]));
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 8) & MASK8; /* 13 */
+ masked = _mm_extract_epi32(_masked,2);
counts[masked] += 1;
- debug(printf("13 %04X => %d\n",masked,counts[masked]));
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 6) & MASK8; /* 12 */
+ masked = _mm_extract_epi32(_masked,3);
counts[masked] += 1;
- debug(printf("12 %04X => %d\n",masked,counts[masked]));
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 4) & MASK8; /* 11 */
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
counts[masked] += 1;
- debug(printf("11 %04X => %d\n",masked,counts[masked]));
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 2) & MASK8; /* 10 */
+ masked = _mm_extract_epi32(_masked,1);
counts[masked] += 1;
- debug(printf("10 %04X => %d\n",masked,counts[masked]));
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
- masked = oligo & MASK8; /* 9 */
+ masked = _mm_extract_epi32(_masked,2);
counts[masked] += 1;
- debug(printf("9 %04X => %d\n",masked,counts[masked]));
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
+#endif
- masked = low_rc >> 16; /* 8, No mask necessary */
+ oligo = high_rc >> 22; /* For 31..27 */
+ oligo |= nextlow_rc << 10;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK6; /* 27 */
counts[masked] += 1;
- debug(printf("8 %04X => %d\n",masked,counts[masked]));
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 14) & MASK8; /* 7 */
+ masked = (oligo >> 2) & MASK6; /* 28 */
counts[masked] += 1;
- debug(printf("7 %04X => %d\n",masked,counts[masked]));
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 12) & MASK8; /* 6 */
+ masked = (oligo >> 4) & MASK6; /* 29 */
counts[masked] += 1;
- debug(printf("6 %04X => %d\n",masked,counts[masked]));
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 10) & MASK8; /* 5 */
+ masked = (oligo >> 6) & MASK6; /* 30 */
counts[masked] += 1;
- debug(printf("5 %04X => %d\n",masked,counts[masked]));
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 8) & MASK8; /* 4 */
+ masked = (oligo >> 8) & MASK6; /* 31 */
counts[masked] += 1;
- debug(printf("4 %04X => %d\n",masked,counts[masked]));
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 6) & MASK8; /* 3 */
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
counts[masked] += 1;
- debug(printf("3 %04X => %d\n",masked,counts[masked]));
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 4) & MASK8; /* 2 */
+ masked = _mm_extract_epi32(_masked,1);
counts[masked] += 1;
- debug(printf("2 %04X => %d\n",masked,counts[masked]));
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 2) & MASK8; /* 1 */
+ masked = _mm_extract_epi32(_masked,2);
counts[masked] += 1;
- debug(printf("1 %04X => %d\n",masked,counts[masked]));
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
- masked = low_rc & MASK8; /* 0 */
+ masked = _mm_extract_epi32(_masked,3);
counts[masked] += 1;
- debug(printf("0 %04X => %d\n",masked,counts[masked]));
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
+
+
+ masked = (oligo >> 8) & MASK6; /* 31 */
+ counts[masked] += 1;
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
+#endif
+
+ return;
+}
+/* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc},
+ and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */
+#ifdef USE_SIMD_FOR_COUNTS
+static void
+extract_6mers_rev_simd (__m128i *out, __m128i current, __m128i next) {
+ __m128i oligo;
+
+ oligo = _mm_or_si128( _mm_srli_epi32(current,22), _mm_slli_epi32(next,10));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask6));
+ _mm_store_si128(out++, _mm_and_si128( oligo, mask6));
+
+ _mm_store_si128(out++, _mm_srli_epi32(current,20));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask6));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask6));
+ _mm_store_si128(out++, _mm_and_si128( current, mask6));
return;
}
-
-
-/* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc},
- and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */
-#ifdef USE_SIMD_FOR_COUNTS
static void
-extract_8mers_rev_simd (__m128i *out, __m128i current, __m128i next) {
+count_6mers_rev_simd (Count_T *counts, __m128i current, __m128i next) {
__m128i oligo;
+ Genomecomp_T array[4];
- oligo = _mm_or_si128( _mm_srli_epi32(current,18), _mm_slli_epi32(next,14));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,12), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask8));
- _mm_store_si128(out++, _mm_and_si128( oligo, mask8));
-
- _mm_store_si128(out++, _mm_srli_epi32(current,16)); /* No mask necessary */;
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask8));
- _mm_store_si128(out++, _mm_and_si128( current, mask8));
+ oligo = _mm_or_si128( _mm_srli_epi32(current,22), _mm_slli_epi32(next,10));
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask6));
+ counts[array[0]] += 1; /* 63 */
+ counts[array[1]] += 1; /* 47 */
+ counts[array[2]] += 1; /* 31 */
+ counts[array[3]] += 1; /* 15 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask6));
+ counts[array[0]] += 1; /* 62 */
+ counts[array[1]] += 1; /* 46 */
+ counts[array[2]] += 1; /* 30 */
+ counts[array[3]] += 1; /* 14 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask6));
+ counts[array[0]] += 1; /* 61 */
+ counts[array[1]] += 1; /* 45 */
+ counts[array[2]] += 1; /* 29 */
+ counts[array[3]] += 1; /* 13 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask6));
+ counts[array[0]] += 1; /* 60 */
+ counts[array[1]] += 1; /* 44 */
+ counts[array[2]] += 1; /* 28 */
+ counts[array[3]] += 1; /* 12 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask6));
+ counts[array[0]] += 1; /* 59 */
+ counts[array[1]] += 1; /* 43 */
+ counts[array[2]] += 1; /* 27 */
+ counts[array[3]] += 1; /* 11 */
+
+
+ _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,20));
+ counts[array[0]] += 1; /* 58 */
+ counts[array[1]] += 1; /* 42 */
+ counts[array[2]] += 1; /* 26 */
+ counts[array[3]] += 1; /* 10 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,18), mask6));
+ counts[array[0]] += 1; /* 57 */
+ counts[array[1]] += 1; /* 41 */
+ counts[array[2]] += 1; /* 25 */
+ counts[array[3]] += 1; /* 9 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,16), mask6));
+ counts[array[0]] += 1; /* 56 */
+ counts[array[1]] += 1; /* 50 */
+ counts[array[2]] += 1; /* 24 */
+ counts[array[3]] += 1; /* 8 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask6));
+ counts[array[0]] += 1; /* 55 */
+ counts[array[1]] += 1; /* 39 */
+ counts[array[2]] += 1; /* 23 */
+ counts[array[3]] += 1; /* 7 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask6));
+ counts[array[0]] += 1; /* 54 */
+ counts[array[1]] += 1; /* 38 */
+ counts[array[2]] += 1; /* 22 */
+ counts[array[3]] += 1; /* 6 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask6));
+ counts[array[0]] += 1; /* 53 */
+ counts[array[1]] += 1; /* 37 */
+ counts[array[2]] += 1; /* 21 */
+ counts[array[3]] += 1; /* 5 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask6));
+ counts[array[0]] += 1; /* 52 */
+ counts[array[1]] += 1; /* 36 */
+ counts[array[2]] += 1; /* 20 */
+ counts[array[3]] += 1; /* 4 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask6));
+ counts[array[0]] += 1; /* 51 */
+ counts[array[1]] += 1; /* 35 */
+ counts[array[2]] += 1; /* 19 */
+ counts[array[3]] += 1; /* 3 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask6));
+ counts[array[0]] += 1; /* 50 */
+ counts[array[1]] += 1; /* 34 */
+ counts[array[2]] += 1; /* 18 */
+ counts[array[3]] += 1; /* 2 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask6));
+ counts[array[0]] += 1; /* 49 */
+ counts[array[1]] += 1; /* 33 */
+ counts[array[2]] += 1; /* 17 */
+ counts[array[3]] += 1; /* 1 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask6));
+ counts[array[0]] += 1; /* 48 */
+ counts[array[1]] += 1; /* 32 */
+ counts[array[2]] += 1; /* 16 */
+ counts[array[3]] += 1; /* 0 */
return;
}
-#endif
-
+#endif
static int
-store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
- oligo = high_rc >> 18; /* For 31..25 */
- oligo |= nextlow_rc << 14;
- masked = (oligo >> 12) & MASK8; /* 31 */
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rc & MASK6; /* 0 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ *(--pointers[masked]) = chrpos;
}
}
- masked = (oligo >> 10) & MASK8; /* 30 */
+ masked = (low_rc >> 2) & MASK6; /* 1 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 1;
+ *(--pointers[masked]) = chrpos - 1;
}
}
- masked = (oligo >> 8) & MASK8; /* 29 */
+ masked = (low_rc >> 4) & MASK6; /* 2 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 2;
+ *(--pointers[masked]) = chrpos - 2;
}
}
- masked = (oligo >> 6) & MASK8; /* 28 */
+ masked = (low_rc >> 6) & MASK6; /* 3 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 3;
+ *(--pointers[masked]) = chrpos - 3;
}
}
- masked = (oligo >> 4) & MASK8; /* 27 */
+ masked = (low_rc >> 8) & MASK6; /* 4 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 4;
+ *(--pointers[masked]) = chrpos - 4;
}
}
- masked = (oligo >> 2) & MASK8; /* 26 */
+ masked = (low_rc >> 10) & MASK6; /* 5 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 5;
+ *(--pointers[masked]) = chrpos - 5;
}
}
- masked = oligo & MASK8; /* 25 */
+ masked = (low_rc >> 12) & MASK6; /* 6 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 6;
+ *(--pointers[masked]) = chrpos - 6;
}
}
-
- masked = high_rc >> 16; /* 24, No mask necessary */
+ masked = (low_rc >> 14) & MASK6; /* 7 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 7;
+ *(--pointers[masked]) = chrpos - 7;
}
}
- masked = (high_rc >> 14) & MASK8; /* 23 */
+ masked = (low_rc >> 16) & MASK6; /* 8 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 8;
+ *(--pointers[masked]) = chrpos - 8;
}
}
- masked = (high_rc >> 12) & MASK8; /* 22 */
+ masked = (low_rc >> 18) & MASK6; /* 9 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 9;
+ *(--pointers[masked]) = chrpos - 9;
}
}
- masked = (high_rc >> 10) & MASK8; /* 21 */
+ masked = low_rc >> 20; /* 10, No mask necessary */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 10;
+ *(--pointers[masked]) = chrpos - 10;
}
}
- masked = (high_rc >> 8) & MASK8; /* 20 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 11;
- }
- }
+#else
+ _oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask6);
- masked = (high_rc >> 6) & MASK8; /* 19 */
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 12;
+ *(--pointers[masked]) = chrpos;
}
}
- masked = (high_rc >> 4) & MASK8; /* 18 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 13;
+ *(--pointers[masked]) = chrpos - 1;
}
}
- masked = (high_rc >> 2) & MASK8; /* 17 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 14;
+ *(--pointers[masked]) = chrpos - 2;
}
}
- masked = high_rc & MASK8; /* 16 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 15;
+ *(--pointers[masked]) = chrpos - 3;
}
}
- oligo = low_rc >> 18; /* For 15..9 */
- oligo |= high_rc << 14;
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask6);
- masked = (oligo >> 12) & MASK8; /* 15 */
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 16;
+ *(--pointers[masked]) = chrpos - 4;
}
}
- masked = (oligo >> 10) & MASK8; /* 14 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 17;
+ *(--pointers[masked]) = chrpos - 5;
}
}
- masked = (oligo >> 8) & MASK8; /* 13 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 18;
+ *(--pointers[masked]) = chrpos - 6;
}
}
- masked = (oligo >> 6) & MASK8; /* 12 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 19;
+ *(--pointers[masked]) = chrpos - 7;
}
}
- masked = (oligo >> 4) & MASK8; /* 11 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 20;
- }
- }
- masked = (oligo >> 2) & MASK8; /* 10 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 21;
- }
- }
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask6);
- masked = oligo & MASK8; /* 9 */
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 22;
+ *(--pointers[masked]) = chrpos - 8;
}
}
-
- masked = low_rc >> 16; /* 8, No mask necessary */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 23;
+ *(--pointers[masked]) = chrpos - 9;
}
}
- masked = (low_rc >> 14) & MASK8; /* 7 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 24;
+ *(--pointers[masked]) = chrpos - 10;
}
}
+#endif
- masked = (low_rc >> 12) & MASK8; /* 6 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 25;
- }
- }
- masked = (low_rc >> 10) & MASK8; /* 5 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 26;
- }
- }
+ oligo = low_rc >> 22; /* For 15..11 */
+ oligo |= high_rc << 10;
- masked = (low_rc >> 8) & MASK8; /* 4 */
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK6; /* 11 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 27;
+ *(--pointers[masked]) = chrpos - 11;
}
}
- masked = (low_rc >> 6) & MASK8; /* 3 */
+ masked = (oligo >> 2) & MASK6; /* 12 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 28;
+ *(--pointers[masked]) = chrpos - 12;
}
}
- masked = (low_rc >> 4) & MASK8; /* 2 */
+ masked = (oligo >> 4) & MASK6; /* 13 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 29;
+ *(--pointers[masked]) = chrpos - 13;
}
}
- masked = (low_rc >> 2) & MASK8; /* 1 */
+ masked = (oligo >> 6) & MASK6; /* 14 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 30;
+ *(--pointers[masked]) = chrpos - 14;
}
}
- masked = low_rc & MASK8; /* 0 */
+ masked = (oligo >> 8) & MASK6; /* 15 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 31;
+ *(--pointers[masked]) = chrpos - 15;
}
}
- return chrpos + 32;
-}
-
-
-
-static void
-count_7mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
- Genomecomp_T masked, oligo;
-
- oligo = high_rc >> 20; /* For 31..26 */
- oligo |= nextlow_rc << 12;
-
- masked = (oligo >> 10) & MASK7; /* 31 */
- counts[masked] += 1;
- debug(printf("31 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 8) & MASK7; /* 30 */
- counts[masked] += 1;
- debug(printf("30 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 6) & MASK7; /* 29 */
- counts[masked] += 1;
- debug(printf("29 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 4) & MASK7; /* 28 */
- counts[masked] += 1;
- debug(printf("28 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 2) & MASK7; /* 27 */
- counts[masked] += 1;
- debug(printf("27 %04X => %d\n",masked,counts[masked]));
-
- masked = oligo & MASK7; /* 26 */
- counts[masked] += 1;
- debug(printf("26 %04X => %d\n",masked,counts[masked]));
-
-
- masked = high_rc >> 18; /* 25, No mask necessary */
- counts[masked] += 1;
- debug(printf("25 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rc >> 16) & MASK7; /* 24 */
- counts[masked] += 1;
- debug(printf("24 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rc >> 14) & MASK7; /* 23 */
- counts[masked] += 1;
- debug(printf("23 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rc >> 12) & MASK7; /* 22 */
- counts[masked] += 1;
- debug(printf("22 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rc >> 10) & MASK7; /* 21 */
- counts[masked] += 1;
- debug(printf("21 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rc >> 8) & MASK7; /* 20 */
- counts[masked] += 1;
- debug(printf("20 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rc >> 6) & MASK7; /* 19 */
- counts[masked] += 1;
- debug(printf("19 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rc >> 4) & MASK7; /* 18 */
- counts[masked] += 1;
- debug(printf("18 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rc >> 2) & MASK7; /* 17 */
- counts[masked] += 1;
- debug(printf("17 %04X => %d\n",masked,counts[masked]));
-
- masked = high_rc & MASK7; /* 16 */
- counts[masked] += 1;
- debug(printf("16 %04X => %d\n",masked,counts[masked]));
-
-
- oligo = low_rc >> 20; /* For 15..10 */
- oligo |= high_rc << 12;
-
- masked = (oligo >> 10) & MASK7; /* 15 */
- counts[masked] += 1;
- debug(printf("15 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 8) & MASK7; /* 14 */
- counts[masked] += 1;
- debug(printf("14 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 6) & MASK7; /* 13 */
- counts[masked] += 1;
- debug(printf("13 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 4) & MASK7; /* 12 */
- counts[masked] += 1;
- debug(printf("12 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 2) & MASK7; /* 11 */
- counts[masked] += 1;
- debug(printf("11 %04X => %d\n",masked,counts[masked]));
-
- masked = oligo & MASK7; /* 10 */
- counts[masked] += 1;
- debug(printf("10 %04X => %d\n",masked,counts[masked]));
-
-
- masked = low_rc >> 18; /* 9, No mask necessary */
- counts[masked] += 1;
- debug(printf("9 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rc >> 16) & MASK7; /* 8 */
- counts[masked] += 1;
- debug(printf("8 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rc >> 14) & MASK7; /* 7 */
- counts[masked] += 1;
- debug(printf("7 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rc >> 12) & MASK7; /* 6 */
- counts[masked] += 1;
- debug(printf("6 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rc >> 10) & MASK7; /* 5 */
- counts[masked] += 1;
- debug(printf("5 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rc >> 8) & MASK7; /* 4 */
- counts[masked] += 1;
- debug(printf("4 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rc >> 6) & MASK7; /* 3 */
- counts[masked] += 1;
- debug(printf("3 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rc >> 4) & MASK7; /* 2 */
- counts[masked] += 1;
- debug(printf("2 %04X => %d\n",masked,counts[masked]));
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask6);
- masked = (low_rc >> 2) & MASK7; /* 1 */
- counts[masked] += 1;
- debug(printf("1 %04X => %d\n",masked,counts[masked]));
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 11;
+ }
+ }
- masked = low_rc & MASK7; /* 0 */
- counts[masked] += 1;
- debug(printf("0 %04X => %d\n",masked,counts[masked]));
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 12;
+ }
+ }
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 13;
+ }
+ }
- return;
-}
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 14;
+ }
+ }
-/* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc},
- and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */
-#ifdef USE_SIMD_FOR_COUNTS
-static void
-extract_7mers_rev_simd (__m128i *out, __m128i current, __m128i next) {
- __m128i oligo;
+ masked = (oligo >> 8) & MASK6; /* 15 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 15;
+ }
+ }
+#endif
- oligo = _mm_or_si128( _mm_srli_epi32(current,20), _mm_slli_epi32(next,12));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask7));
- _mm_store_si128(out++, _mm_and_si128( oligo, mask7));
- _mm_store_si128(out++, _mm_srli_epi32(current,18)); /* No mask necessary */
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask7));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask7));
- _mm_store_si128(out++, _mm_and_si128( current, mask7));
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rc & MASK6; /* 16 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 16;
+ }
+ }
- return;
-}
-#endif
+ masked = (high_rc >> 2) & MASK6; /* 17 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 17;
+ }
+ }
+ masked = (high_rc >> 4) & MASK6; /* 18 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 18;
+ }
+ }
-static Chrpos_T
-store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
- Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
- Genomecomp_T masked, oligo;
+ masked = (high_rc >> 6) & MASK6; /* 19 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 19;
+ }
+ }
- oligo = high_rc >> 20; /* For 31..26 */
- oligo |= nextlow_rc << 12;
+ masked = (high_rc >> 8) & MASK6; /* 20 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 20;
+ }
+ }
- masked = (oligo >> 10) & MASK7; /* 31 */
+ masked = (high_rc >> 10) & MASK6; /* 21 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ *(--pointers[masked]) = chrpos - 21;
}
}
- masked = (oligo >> 8) & MASK7; /* 30 */
+ masked = (high_rc >> 12) & MASK6; /* 22 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 1;
+ *(--pointers[masked]) = chrpos - 22;
}
}
- masked = (oligo >> 6) & MASK7; /* 29 */
+ masked = (high_rc >> 14) & MASK6; /* 23 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 2;
+ *(--pointers[masked]) = chrpos - 23;
}
}
- masked = (oligo >> 4) & MASK7; /* 28 */
+ masked = (high_rc >> 16) & MASK6; /* 24 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 3;
+ *(--pointers[masked]) = chrpos - 24;
}
}
- masked = (oligo >> 2) & MASK7; /* 27 */
+ masked = (high_rc >> 18) & MASK6; /* 25 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 4;
+ *(--pointers[masked]) = chrpos - 25;
}
}
- masked = oligo & MASK7; /* 26 */
+ masked = high_rc >> 20; /* 26, No mask necessary */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 5;
+ *(--pointers[masked]) = chrpos - 26;
}
}
+#else
+ _oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask6);
- masked = high_rc >> 18; /* 25, No mask necessary */
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 6;
+ *(--pointers[masked]) = chrpos - 16;
}
}
- masked = (high_rc >> 16) & MASK7; /* 24 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 7;
+ *(--pointers[masked]) = chrpos - 17;
}
}
- masked = (high_rc >> 14) & MASK7; /* 23 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 8;
+ *(--pointers[masked]) = chrpos - 18;
}
}
- masked = (high_rc >> 12) & MASK7; /* 22 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 9;
+ *(--pointers[masked]) = chrpos - 19;
}
}
- masked = (high_rc >> 10) & MASK7; /* 21 */
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 10;
+ *(--pointers[masked]) = chrpos - 20;
}
}
- masked = (high_rc >> 8) & MASK7; /* 20 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 11;
+ *(--pointers[masked]) = chrpos - 21;
}
}
- masked = (high_rc >> 6) & MASK7; /* 19 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 12;
+ *(--pointers[masked]) = chrpos - 22;
}
}
- masked = (high_rc >> 4) & MASK7; /* 18 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 13;
+ *(--pointers[masked]) = chrpos - 23;
}
}
- masked = (high_rc >> 2) & MASK7; /* 17 */
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 14;
+ *(--pointers[masked]) = chrpos - 24;
}
}
- masked = high_rc & MASK7; /* 16 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 15;
+ *(--pointers[masked]) = chrpos - 25;
}
}
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 26;
+ }
+ }
+#endif
- oligo = low_rc >> 20; /* For 15..10 */
- oligo |= high_rc << 12;
- masked = (oligo >> 10) & MASK7; /* 15 */
+ oligo = high_rc >> 22; /* For 31..27 */
+ oligo |= nextlow_rc << 10;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK6; /* 27 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 16;
+ *(--pointers[masked]) = chrpos - 27;
}
}
- masked = (oligo >> 8) & MASK7; /* 14 */
+ masked = (oligo >> 2) & MASK6; /* 28 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 17;
+ *(--pointers[masked]) = chrpos - 28;
}
}
- masked = (oligo >> 6) & MASK7; /* 13 */
+ masked = (oligo >> 4) & MASK6; /* 29 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 18;
+ *(--pointers[masked]) = chrpos - 29;
}
}
- masked = (oligo >> 4) & MASK7; /* 12 */
+ masked = (oligo >> 6) & MASK6; /* 30 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 19;
+ *(--pointers[masked]) = chrpos - 30;
}
}
- masked = (oligo >> 2) & MASK7; /* 11 */
+ masked = (oligo >> 8) & MASK6; /* 31 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 20;
+ *(--pointers[masked]) = chrpos - 31;
}
}
- masked = oligo & MASK7; /* 10 */
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask6);
+
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 21;
+ *(--pointers[masked]) = chrpos - 27;
}
}
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 28;
+ }
+ }
- masked = low_rc >> 18; /* 9, No mask necessary */
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 29;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 30;
+ }
+ }
+
+
+ masked = (oligo >> 8) & MASK6; /* 31 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 22;
+ *(--pointers[masked]) = chrpos - 31;
}
}
+#endif
+
+ return chrpos - 32;
+}
+
+
+static void
+count_5mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+ Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rc & MASK5; /* 0 */
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 2) & MASK5; /* 1 */
+ counts[masked] += 1;
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 4) & MASK5; /* 2 */
+ counts[masked] += 1;
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 6) & MASK5; /* 3 */
+ counts[masked] += 1;
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 8) & MASK5; /* 4 */
+ counts[masked] += 1;
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 10) & MASK5; /* 5 */
+ counts[masked] += 1;
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 12) & MASK5; /* 6 */
+ counts[masked] += 1;
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 14) & MASK5; /* 7 */
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 16) & MASK5; /* 8 */
+ counts[masked] += 1;
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 18) & MASK5; /* 9 */
+ counts[masked] += 1;
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
+
+ masked = (low_rc >> 20) & MASK5; /* 10 */
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
+
+ masked = low_rc >> 22; /* 11, No mask necessary */
+ counts[masked] += 1;
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
+
+#else
+ _oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("0 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("2 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("3 %04X => %d\n",masked,counts[masked]));
+
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("4 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("5 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("6 %04X => %d\n",masked,counts[masked]));
+
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("7 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 16) & MASK7; /* 8 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 23;
- }
- }
- masked = (low_rc >> 14) & MASK7; /* 7 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 24;
- }
- }
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask5);
- masked = (low_rc >> 12) & MASK7; /* 6 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 25;
- }
- }
+ masked = _mm_extract_epi32(_masked,0);
+ counts[masked] += 1;
+ debug(printf("8 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 10) & MASK7; /* 5 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 26;
- }
- }
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("9 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 8) & MASK7; /* 4 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 27;
- }
- }
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("10 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 6) & MASK7; /* 3 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 28;
- }
- }
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("11 %04X => %d\n",masked,counts[masked]));
+#endif
- masked = (low_rc >> 4) & MASK7; /* 2 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 29;
- }
- }
- masked = (low_rc >> 2) & MASK7; /* 1 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 30;
- }
- }
+ oligo = low_rc >> 24; /* For 15..12 */
+ oligo |= high_rc << 8;
- masked = low_rc & MASK7; /* 0 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 31;
- }
- }
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK5; /* 12 */
+ counts[masked] += 1;
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
- return chrpos + 32;
-}
+ masked = (oligo >> 2) & MASK5; /* 13 */
+ counts[masked] += 1;
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
+ masked = (oligo >> 4) & MASK5; /* 14 */
+ counts[masked] += 1;
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
-static void
-count_6mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
- Genomecomp_T masked, oligo;
+ masked = (oligo >> 6) & MASK5; /* 15 */
+ counts[masked] += 1;
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
- oligo = high_rc >> 22; /* For 31..27 */
- oligo |= nextlow_rc << 10;
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask5);
- masked = (oligo >> 8) & MASK6; /* 31 */
+ masked = _mm_extract_epi32(_masked,0);
counts[masked] += 1;
- debug(printf("31 %04X => %d\n",masked,counts[masked]));
+ debug(printf("12 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 6) & MASK6; /* 30 */
+ masked = _mm_extract_epi32(_masked,1);
counts[masked] += 1;
- debug(printf("30 %04X => %d\n",masked,counts[masked]));
+ debug(printf("13 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 4) & MASK6; /* 29 */
+ masked = _mm_extract_epi32(_masked,2);
counts[masked] += 1;
- debug(printf("29 %04X => %d\n",masked,counts[masked]));
+ debug(printf("14 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 2) & MASK6; /* 28 */
+ masked = _mm_extract_epi32(_masked,3);
counts[masked] += 1;
- debug(printf("28 %04X => %d\n",masked,counts[masked]));
+ debug(printf("15 %04X => %d\n",masked,counts[masked]));
+#endif
- masked = oligo & MASK6; /* 27 */
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rc & MASK5; /* 16 */
counts[masked] += 1;
- debug(printf("27 %04X => %d\n",masked,counts[masked]));
+ debug(printf("16 %04X => %d\n",masked,counts[masked]));
+ masked = (high_rc >> 2) & MASK5; /* 17 */
+ counts[masked] += 1;
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
- masked = high_rc >> 20; /* 26, No mask necessary */
+ masked = (high_rc >> 4) & MASK5; /* 18 */
counts[masked] += 1;
- debug(printf("26 %04X => %d\n",masked,counts[masked]));
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
- masked = (high_rc >> 18) & MASK6; /* 25 */
+ masked = (high_rc >> 6) & MASK5; /* 19 */
counts[masked] += 1;
- debug(printf("25 %04X => %d\n",masked,counts[masked]));
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
- masked = (high_rc >> 16) & MASK6; /* 24 */
+ masked = (high_rc >> 8) & MASK5; /* 20 */
counts[masked] += 1;
- debug(printf("24 %04X => %d\n",masked,counts[masked]));
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
- masked = (high_rc >> 14) & MASK6; /* 23 */
+ masked = (high_rc >> 10) & MASK5; /* 21 */
counts[masked] += 1;
- debug(printf("23 %04X => %d\n",masked,counts[masked]));
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
- masked = (high_rc >> 12) & MASK6; /* 22 */
+ masked = (high_rc >> 12) & MASK5; /* 22 */
counts[masked] += 1;
debug(printf("22 %04X => %d\n",masked,counts[masked]));
- masked = (high_rc >> 10) & MASK6; /* 21 */
+ masked = (high_rc >> 14) & MASK5; /* 23 */
counts[masked] += 1;
- debug(printf("21 %04X => %d\n",masked,counts[masked]));
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
- masked = (high_rc >> 8) & MASK6; /* 20 */
+ masked = (high_rc >> 16) & MASK5; /* 24 */
counts[masked] += 1;
- debug(printf("20 %04X => %d\n",masked,counts[masked]));
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
- masked = (high_rc >> 6) & MASK6; /* 19 */
+ masked = (high_rc >> 18) & MASK5; /* 25 */
counts[masked] += 1;
- debug(printf("19 %04X => %d\n",masked,counts[masked]));
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
- masked = (high_rc >> 4) & MASK6; /* 18 */
+ masked = (high_rc >> 20) & MASK5; /* 26 */
counts[masked] += 1;
- debug(printf("18 %04X => %d\n",masked,counts[masked]));
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
- masked = (high_rc >> 2) & MASK6; /* 17 */
+ masked = high_rc >> 22; /* 27, No mask necessary */
counts[masked] += 1;
- debug(printf("17 %04X => %d\n",masked,counts[masked]));
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
- masked = high_rc & MASK6; /* 16 */
+#else
+ _oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
counts[masked] += 1;
debug(printf("16 %04X => %d\n",masked,counts[masked]));
+ masked = _mm_extract_epi32(_masked,1);
+ counts[masked] += 1;
+ debug(printf("17 %04X => %d\n",masked,counts[masked]));
- oligo = low_rc >> 22; /* For 15..11 */
- oligo |= high_rc << 10;
+ masked = _mm_extract_epi32(_masked,2);
+ counts[masked] += 1;
+ debug(printf("18 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 8) & MASK6; /* 15 */
+ masked = _mm_extract_epi32(_masked,3);
counts[masked] += 1;
- debug(printf("15 %04X => %d\n",masked,counts[masked]));
+ debug(printf("19 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 6) & MASK6; /* 14 */
+
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
counts[masked] += 1;
- debug(printf("14 %04X => %d\n",masked,counts[masked]));
+ debug(printf("20 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 4) & MASK6; /* 13 */
+ masked = _mm_extract_epi32(_masked,1);
counts[masked] += 1;
- debug(printf("13 %04X => %d\n",masked,counts[masked]));
+ debug(printf("21 %04X => %d\n",masked,counts[masked]));
- masked = (oligo >> 2) & MASK6; /* 12 */
+ masked = _mm_extract_epi32(_masked,2);
counts[masked] += 1;
- debug(printf("12 %04X => %d\n",masked,counts[masked]));
+ debug(printf("22 %04X => %d\n",masked,counts[masked]));
- masked = oligo & MASK6; /* 11 */
+ masked = _mm_extract_epi32(_masked,3);
counts[masked] += 1;
- debug(printf("11 %04X => %d\n",masked,counts[masked]));
+ debug(printf("23 %04X => %d\n",masked,counts[masked]));
- masked = low_rc >> 20; /* 10, No mask necessary */
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
counts[masked] += 1;
- debug(printf("10 %04X => %d\n",masked,counts[masked]));
+ debug(printf("24 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 18) & MASK6; /* 9 */
+ masked = _mm_extract_epi32(_masked,1);
counts[masked] += 1;
- debug(printf("9 %04X => %d\n",masked,counts[masked]));
+ debug(printf("25 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 16) & MASK6; /* 8 */
+ masked = _mm_extract_epi32(_masked,2);
counts[masked] += 1;
- debug(printf("8 %04X => %d\n",masked,counts[masked]));
+ debug(printf("26 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 14) & MASK6; /* 7 */
+ masked = _mm_extract_epi32(_masked,3);
counts[masked] += 1;
- debug(printf("7 %04X => %d\n",masked,counts[masked]));
+ debug(printf("27 %04X => %d\n",masked,counts[masked]));
+#endif
- masked = (low_rc >> 12) & MASK6; /* 6 */
+
+ oligo = high_rc >> 24; /* For 31..28 */
+ oligo |= nextlow_rc << 8;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK5; /* 28 */
counts[masked] += 1;
- debug(printf("6 %04X => %d\n",masked,counts[masked]));
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 10) & MASK6; /* 5 */
+ masked = (oligo >> 2) & MASK5; /* 29 */
counts[masked] += 1;
- debug(printf("5 %04X => %d\n",masked,counts[masked]));
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 8) & MASK6; /* 4 */
+ masked = (oligo >> 4) & MASK5; /* 30 */
counts[masked] += 1;
- debug(printf("4 %04X => %d\n",masked,counts[masked]));
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 6) & MASK6; /* 3 */
+ masked = (oligo >> 6) & MASK5; /* 31 */
counts[masked] += 1;
- debug(printf("3 %04X => %d\n",masked,counts[masked]));
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 4) & MASK6; /* 2 */
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
counts[masked] += 1;
- debug(printf("2 %04X => %d\n",masked,counts[masked]));
+ debug(printf("28 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 2) & MASK6; /* 1 */
+ masked = _mm_extract_epi32(_masked,1);
counts[masked] += 1;
- debug(printf("1 %04X => %d\n",masked,counts[masked]));
+ debug(printf("29 %04X => %d\n",masked,counts[masked]));
- masked = low_rc & MASK6; /* 0 */
+ masked = _mm_extract_epi32(_masked,2);
counts[masked] += 1;
- debug(printf("0 %04X => %d\n",masked,counts[masked]));
+ debug(printf("30 %04X => %d\n",masked,counts[masked]));
+ masked = _mm_extract_epi32(_masked,3);
+ counts[masked] += 1;
+ debug(printf("31 %04X => %d\n",masked,counts[masked]));
+#endif
return;
}
+
/* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc},
and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */
#ifdef USE_SIMD_FOR_COUNTS
static void
-extract_6mers_rev_simd (__m128i *out, __m128i current, __m128i next) {
+extract_5mers_rev_simd (__m128i *out, __m128i current, __m128i next) {
__m128i oligo;
- oligo = _mm_or_si128( _mm_srli_epi32(current,22), _mm_slli_epi32(next,10));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask6));
- _mm_store_si128(out++, _mm_and_si128( oligo, mask6));
+ oligo = _mm_or_si128( _mm_srli_epi32(current,24), _mm_slli_epi32(next,8));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask5));
+ _mm_store_si128(out++, _mm_and_si128( oligo, mask5));
- _mm_store_si128(out++, _mm_srli_epi32(current,20));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask6));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask6));
- _mm_store_si128(out++, _mm_and_si128( current, mask6));
+ _mm_store_si128(out++, _mm_srli_epi32(current,22));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,20), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask5));
+ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask5));
+ _mm_store_si128(out++, _mm_and_si128( current, mask5));
+
+ return;
+}
+
+static void
+count_5mers_rev_simd (Count_T *counts, __m128i current, __m128i next) {
+ __m128i oligo;
+ Genomecomp_T array[4];
+
+ oligo = _mm_or_si128( _mm_srli_epi32(current,24), _mm_slli_epi32(next,8));
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask5));
+ counts[array[0]] += 1; /* 63 */
+ counts[array[1]] += 1; /* 47 */
+ counts[array[2]] += 1; /* 31 */
+ counts[array[3]] += 1; /* 15 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask5));
+ counts[array[0]] += 1; /* 62 */
+ counts[array[1]] += 1; /* 46 */
+ counts[array[2]] += 1; /* 30 */
+ counts[array[3]] += 1; /* 14 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask5));
+ counts[array[0]] += 1; /* 61 */
+ counts[array[1]] += 1; /* 45 */
+ counts[array[2]] += 1; /* 29 */
+ counts[array[3]] += 1; /* 13 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask5));
+ counts[array[0]] += 1; /* 60 */
+ counts[array[1]] += 1; /* 44 */
+ counts[array[2]] += 1; /* 28 */
+ counts[array[3]] += 1; /* 12 */
+
+
+ _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,22));
+ counts[array[0]] += 1; /* 59 */
+ counts[array[1]] += 1; /* 43 */
+ counts[array[2]] += 1; /* 27 */
+ counts[array[3]] += 1; /* 11 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,20), mask5));
+ counts[array[0]] += 1; /* 58 */
+ counts[array[1]] += 1; /* 42 */
+ counts[array[2]] += 1; /* 26 */
+ counts[array[3]] += 1; /* 10 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,18), mask5));
+ counts[array[0]] += 1; /* 57 */
+ counts[array[1]] += 1; /* 41 */
+ counts[array[2]] += 1; /* 25 */
+ counts[array[3]] += 1; /* 9 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,16), mask5));
+ counts[array[0]] += 1; /* 56 */
+ counts[array[1]] += 1; /* 50 */
+ counts[array[2]] += 1; /* 24 */
+ counts[array[3]] += 1; /* 8 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask5));
+ counts[array[0]] += 1; /* 55 */
+ counts[array[1]] += 1; /* 39 */
+ counts[array[2]] += 1; /* 23 */
+ counts[array[3]] += 1; /* 7 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask5));
+ counts[array[0]] += 1; /* 54 */
+ counts[array[1]] += 1; /* 38 */
+ counts[array[2]] += 1; /* 22 */
+ counts[array[3]] += 1; /* 6 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask5));
+ counts[array[0]] += 1; /* 53 */
+ counts[array[1]] += 1; /* 37 */
+ counts[array[2]] += 1; /* 21 */
+ counts[array[3]] += 1; /* 5 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask5));
+ counts[array[0]] += 1; /* 52 */
+ counts[array[1]] += 1; /* 36 */
+ counts[array[2]] += 1; /* 20 */
+ counts[array[3]] += 1; /* 4 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask5));
+ counts[array[0]] += 1; /* 51 */
+ counts[array[1]] += 1; /* 35 */
+ counts[array[2]] += 1; /* 19 */
+ counts[array[3]] += 1; /* 3 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask5));
+ counts[array[0]] += 1; /* 50 */
+ counts[array[1]] += 1; /* 34 */
+ counts[array[2]] += 1; /* 18 */
+ counts[array[3]] += 1; /* 2 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask5));
+ counts[array[0]] += 1; /* 49 */
+ counts[array[1]] += 1; /* 33 */
+ counts[array[2]] += 1; /* 17 */
+ counts[array[3]] += 1; /* 1 */
+
+ _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask5));
+ counts[array[0]] += 1; /* 48 */
+ counts[array[1]] += 1; /* 32 */
+ counts[array[2]] += 1; /* 16 */
+ counts[array[3]] += 1; /* 0 */
return;
}
@@ -15028,794 +23423,644 @@ extract_6mers_rev_simd (__m128i *out, __m128i current, __m128i next) {
static int
-store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
+store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
Genomecomp_T masked, oligo;
+#ifndef INDIVIDUAL_SHIFTS
+ __m128i _oligo, _masked;
+#endif
- oligo = high_rc >> 22; /* For 31..27 */
- oligo |= nextlow_rc << 10;
-
- masked = (oligo >> 8) & MASK6; /* 31 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos;
- }
- }
-
- masked = (oligo >> 6) & MASK6; /* 30 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 1;
- }
- }
-
- masked = (oligo >> 4) & MASK6; /* 29 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 2;
- }
- }
-
- masked = (oligo >> 2) & MASK6; /* 28 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 3;
- }
- }
-
- masked = oligo & MASK6; /* 27 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 4;
- }
- }
-
-
- masked = high_rc >> 20; /* 26, No mask necessary */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 5;
- }
- }
-
- masked = (high_rc >> 18) & MASK6; /* 25 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 6;
- }
- }
-
- masked = (high_rc >> 16) & MASK6; /* 24 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 7;
- }
- }
-
- masked = (high_rc >> 14) & MASK6; /* 23 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 8;
- }
- }
- masked = (high_rc >> 12) & MASK6; /* 22 */
+#ifdef INDIVIDUAL_SHIFTS
+ masked = low_rc & MASK5; /* 0 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 9;
+ *(--pointers[masked]) = chrpos;
}
}
- masked = (high_rc >> 10) & MASK6; /* 21 */
+ masked = (low_rc >> 2) & MASK5; /* 1 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 10;
+ *(--pointers[masked]) = chrpos - 1;
}
}
- masked = (high_rc >> 8) & MASK6; /* 20 */
+ masked = (low_rc >> 4) & MASK5; /* 2 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 11;
+ *(--pointers[masked]) = chrpos - 2;
}
}
- masked = (high_rc >> 6) & MASK6; /* 19 */
+ masked = (low_rc >> 6) & MASK5; /* 3 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 12;
+ *(--pointers[masked]) = chrpos - 3;
}
}
- masked = (high_rc >> 4) & MASK6; /* 18 */
+ masked = (low_rc >> 8) & MASK5; /* 4 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 13;
+ *(--pointers[masked]) = chrpos - 4;
}
}
- masked = (high_rc >> 2) & MASK6; /* 17 */
+ masked = (low_rc >> 10) & MASK5; /* 5 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 14;
+ *(--pointers[masked]) = chrpos - 5;
}
}
- masked = high_rc & MASK6; /* 16 */
+ masked = (low_rc >> 12) & MASK5; /* 6 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 15;
+ *(--pointers[masked]) = chrpos - 6;
}
}
-
- oligo = low_rc >> 22; /* For 15..11 */
- oligo |= high_rc << 10;
-
- masked = (oligo >> 8) & MASK6; /* 15 */
+ masked = (low_rc >> 14) & MASK5; /* 7 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 16;
+ *(--pointers[masked]) = chrpos - 7;
}
}
- masked = (oligo >> 6) & MASK6; /* 14 */
+ masked = (low_rc >> 16) & MASK5; /* 8 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 17;
+ *(--pointers[masked]) = chrpos - 8;
}
}
- masked = (oligo >> 4) & MASK6; /* 13 */
+ masked = (low_rc >> 18) & MASK5; /* 9 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 18;
+ *(--pointers[masked]) = chrpos - 9;
}
}
- masked = (oligo >> 2) & MASK6; /* 12 */
+ masked = (low_rc >> 20) & MASK5; /* 10 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 19;
+ *(--pointers[masked]) = chrpos - 10;
}
}
- masked = oligo & MASK6; /* 11 */
+ masked = low_rc >> 22; /* 11, No mask necessary */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 20;
+ *(--pointers[masked]) = chrpos - 11;
}
}
+#else
+ _oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask5);
- masked = low_rc >> 20; /* 10, No mask necessary */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 21;
- }
- }
-
- masked = (low_rc >> 18) & MASK6; /* 9 */
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 22;
+ *(--pointers[masked]) = chrpos;
}
}
- masked = (low_rc >> 16) & MASK6; /* 8 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 23;
+ *(--pointers[masked]) = chrpos - 1;
}
}
- masked = (low_rc >> 14) & MASK6; /* 7 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 24;
+ *(--pointers[masked]) = chrpos - 2;
}
}
- masked = (low_rc >> 12) & MASK6; /* 6 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 25;
+ *(--pointers[masked]) = chrpos - 3;
}
}
- masked = (low_rc >> 10) & MASK6; /* 5 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 26;
- }
- }
- masked = (low_rc >> 8) & MASK6; /* 4 */
- if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
- counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 27;
- }
- }
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask5);
- masked = (low_rc >> 6) & MASK6; /* 3 */
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 28;
+ *(--pointers[masked]) = chrpos - 4;
}
}
- masked = (low_rc >> 4) & MASK6; /* 2 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 29;
+ *(--pointers[masked]) = chrpos - 5;
}
}
- masked = (low_rc >> 2) & MASK6; /* 1 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 30;
+ *(--pointers[masked]) = chrpos - 6;
}
}
- masked = low_rc & MASK6; /* 0 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
- } else {
- *(pointers[masked]++) = chrpos + 31;
- }
- }
-
- return chrpos + 32;
-}
-
-
-static void
-count_5mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
- Genomecomp_T masked, oligo;
-
- oligo = high_rc >> 24; /* For 31..28 */
- oligo |= nextlow_rc << 8;
-
- masked = (oligo >> 6) & MASK5; /* 31 */
- counts[masked] += 1;
- debug(printf("31 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 4) & MASK5; /* 30 */
- counts[masked] += 1;
- debug(printf("30 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 2) & MASK5; /* 29 */
- counts[masked] += 1;
- debug(printf("29 %04X => %d\n",masked,counts[masked]));
-
- masked = oligo & MASK5; /* 28 */
- counts[masked] += 1;
- debug(printf("28 %04X => %d\n",masked,counts[masked]));
-
-
- masked = high_rc >> 22; /* 27, No mask necessary */
- counts[masked] += 1;
- debug(printf("27 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rc >> 20) & MASK5; /* 26 */
- counts[masked] += 1;
- debug(printf("26 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rc >> 18) & MASK5; /* 25 */
- counts[masked] += 1;
- debug(printf("25 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rc >> 16) & MASK5; /* 24 */
- counts[masked] += 1;
- debug(printf("24 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rc >> 14) & MASK5; /* 23 */
- counts[masked] += 1;
- debug(printf("23 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rc >> 12) & MASK5; /* 22 */
- counts[masked] += 1;
- debug(printf("22 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rc >> 10) & MASK5; /* 21 */
- counts[masked] += 1;
- debug(printf("21 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rc >> 8) & MASK5; /* 20 */
- counts[masked] += 1;
- debug(printf("20 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rc >> 6) & MASK5; /* 19 */
- counts[masked] += 1;
- debug(printf("19 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rc >> 4) & MASK5; /* 18 */
- counts[masked] += 1;
- debug(printf("18 %04X => %d\n",masked,counts[masked]));
-
- masked = (high_rc >> 2) & MASK5; /* 17 */
- counts[masked] += 1;
- debug(printf("17 %04X => %d\n",masked,counts[masked]));
-
- masked = high_rc & MASK5; /* 16 */
- counts[masked] += 1;
- debug(printf("16 %04X => %d\n",masked,counts[masked]));
-
-
- oligo = low_rc >> 24; /* For 15..12 */
- oligo |= high_rc << 8;
-
- masked = (oligo >> 6) & MASK5; /* 15 */
- counts[masked] += 1;
- debug(printf("15 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 4) & MASK5; /* 14 */
- counts[masked] += 1;
- debug(printf("14 %04X => %d\n",masked,counts[masked]));
-
- masked = (oligo >> 2) & MASK5; /* 13 */
- counts[masked] += 1;
- debug(printf("13 %04X => %d\n",masked,counts[masked]));
-
- masked = oligo & MASK5; /* 12 */
- counts[masked] += 1;
- debug(printf("12 %04X => %d\n",masked,counts[masked]));
-
-
- masked = low_rc >> 22; /* 11, No mask necessary */
- counts[masked] += 1;
- debug(printf("11 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rc >> 20) & MASK5; /* 10 */
- counts[masked] += 1;
- debug(printf("10 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rc >> 18) & MASK5; /* 9 */
- counts[masked] += 1;
- debug(printf("9 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rc >> 16) & MASK5; /* 8 */
- counts[masked] += 1;
- debug(printf("8 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rc >> 14) & MASK5; /* 7 */
- counts[masked] += 1;
- debug(printf("7 %04X => %d\n",masked,counts[masked]));
-
- masked = (low_rc >> 12) & MASK5; /* 6 */
- counts[masked] += 1;
- debug(printf("6 %04X => %d\n",masked,counts[masked]));
+ } else {
+ *(--pointers[masked]) = chrpos - 7;
+ }
+ }
- masked = (low_rc >> 10) & MASK5; /* 5 */
- counts[masked] += 1;
- debug(printf("5 %04X => %d\n",masked,counts[masked]));
- masked = (low_rc >> 8) & MASK5; /* 4 */
- counts[masked] += 1;
- debug(printf("4 %04X => %d\n",masked,counts[masked]));
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask5);
- masked = (low_rc >> 6) & MASK5; /* 3 */
- counts[masked] += 1;
- debug(printf("3 %04X => %d\n",masked,counts[masked]));
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 8;
+ }
+ }
- masked = (low_rc >> 4) & MASK5; /* 2 */
- counts[masked] += 1;
- debug(printf("2 %04X => %d\n",masked,counts[masked]));
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 9;
+ }
+ }
- masked = (low_rc >> 2) & MASK5; /* 1 */
- counts[masked] += 1;
- debug(printf("1 %04X => %d\n",masked,counts[masked]));
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 10;
+ }
+ }
- masked = low_rc & MASK5; /* 0 */
- counts[masked] += 1;
- debug(printf("0 %04X => %d\n",masked,counts[masked]));
+ masked = _mm_extract_epi32(_masked,3);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 11;
+ }
+ }
+#endif
- return;
-}
+ oligo = low_rc >> 24; /* For 15..12 */
+ oligo |= high_rc << 8;
-/* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc},
- and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */
-#ifdef USE_SIMD_FOR_COUNTS
-static void
-extract_5mers_rev_simd (__m128i *out, __m128i current, __m128i next) {
- __m128i oligo;
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK5; /* 12 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 12;
+ }
+ }
- oligo = _mm_or_si128( _mm_srli_epi32(current,24), _mm_slli_epi32(next,8));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask5));
- _mm_store_si128(out++, _mm_and_si128( oligo, mask5));
+ masked = (oligo >> 2) & MASK5; /* 13 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 13;
+ }
+ }
- _mm_store_si128(out++, _mm_srli_epi32(current,22));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,20), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask5));
- _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask5));
- _mm_store_si128(out++, _mm_and_si128( current, mask5));
+ masked = (oligo >> 4) & MASK5; /* 14 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 14;
+ }
+ }
- return;
-}
-#endif
+ masked = (oligo >> 6) & MASK5; /* 15 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 15;
+ }
+ }
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask5);
-static int
-store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
- Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
- Genomecomp_T masked, oligo;
+ masked = _mm_extract_epi32(_masked,0);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 12;
+ }
+ }
- oligo = high_rc >> 24; /* For 31..28 */
- oligo |= nextlow_rc << 8;
+ masked = _mm_extract_epi32(_masked,1);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 13;
+ }
+ }
- masked = (oligo >> 6) & MASK5; /* 31 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos;
+ *(--pointers[masked]) = chrpos - 14;
}
}
- masked = (oligo >> 4) & MASK5; /* 30 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 1;
+ *(--pointers[masked]) = chrpos - 15;
}
}
+#endif
- masked = (oligo >> 2) & MASK5; /* 29 */
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = high_rc & MASK5; /* 16 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 2;
+ *(--pointers[masked]) = chrpos - 16;
}
}
- masked = oligo & MASK5; /* 28 */
+ masked = (high_rc >> 2) & MASK5; /* 17 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 3;
+ *(--pointers[masked]) = chrpos - 17;
}
}
+ masked = (high_rc >> 4) & MASK5; /* 18 */
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 18;
+ }
+ }
- masked = high_rc >> 22; /* 27, No mask necessary */
+ masked = (high_rc >> 6) & MASK5; /* 19 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 4;
+ *(--pointers[masked]) = chrpos - 19;
}
}
- masked = (high_rc >> 20) & MASK5; /* 26 */
+ masked = (high_rc >> 8) & MASK5; /* 20 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 5;
+ *(--pointers[masked]) = chrpos - 20;
}
}
- masked = (high_rc >> 18) & MASK5; /* 25 */
+ masked = (high_rc >> 10) & MASK5; /* 21 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 6;
+ *(--pointers[masked]) = chrpos - 21;
}
}
- masked = (high_rc >> 16) & MASK5; /* 24 */
+ masked = (high_rc >> 12) & MASK5; /* 22 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 7;
+ *(--pointers[masked]) = chrpos - 22;
}
}
masked = (high_rc >> 14) & MASK5; /* 23 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 8;
+ *(--pointers[masked]) = chrpos - 23;
}
}
- masked = (high_rc >> 12) & MASK5; /* 22 */
+ masked = (high_rc >> 16) & MASK5; /* 24 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 9;
+ *(--pointers[masked]) = chrpos - 24;
}
}
- masked = (high_rc >> 10) & MASK5; /* 21 */
+ masked = (high_rc >> 18) & MASK5; /* 25 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 10;
+ *(--pointers[masked]) = chrpos - 25;
}
}
- masked = (high_rc >> 8) & MASK5; /* 20 */
+ masked = (high_rc >> 20) & MASK5; /* 26 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 11;
+ *(--pointers[masked]) = chrpos - 26;
}
}
- masked = (high_rc >> 6) & MASK5; /* 19 */
+ masked = high_rc >> 22; /* 27, No mask necessary */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 12;
+ *(--pointers[masked]) = chrpos - 27;
}
}
- masked = (high_rc >> 4) & MASK5; /* 18 */
+#else
+ _oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 13;
+ *(--pointers[masked]) = chrpos - 16;
}
}
- masked = (high_rc >> 2) & MASK5; /* 17 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 14;
+ *(--pointers[masked]) = chrpos - 17;
}
}
- masked = high_rc & MASK5; /* 16 */
+ masked = _mm_extract_epi32(_masked,2);
+ if (counts[masked]) {
+ if (pointers[masked] == positions[masked]) {
+ counts[masked] = 0;
+ } else {
+ *(--pointers[masked]) = chrpos - 18;
+ }
+ }
+
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 15;
+ *(--pointers[masked]) = chrpos - 19;
}
}
- oligo = low_rc >> 24; /* For 15..12 */
- oligo |= high_rc << 8;
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask5);
- masked = (oligo >> 6) & MASK5; /* 15 */
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 16;
+ *(--pointers[masked]) = chrpos - 20;
}
}
- masked = (oligo >> 4) & MASK5; /* 14 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 17;
+ *(--pointers[masked]) = chrpos - 21;
}
}
- masked = (oligo >> 2) & MASK5; /* 13 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 18;
+ *(--pointers[masked]) = chrpos - 22;
}
}
- masked = oligo & MASK5; /* 12 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 19;
+ *(--pointers[masked]) = chrpos - 23;
}
}
- masked = low_rc >> 22; /* 11, No mask necessary */
+ _oligo = _mm_srli_epi32(_oligo, 8);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 20;
+ *(--pointers[masked]) = chrpos - 24;
}
}
- masked = (low_rc >> 20) & MASK5; /* 10 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 21;
+ *(--pointers[masked]) = chrpos - 25;
}
}
- masked = (low_rc >> 18) & MASK5; /* 9 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 22;
+ *(--pointers[masked]) = chrpos - 26;
}
}
- masked = (low_rc >> 16) & MASK5; /* 8 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 23;
+ *(--pointers[masked]) = chrpos - 27;
}
}
+#endif
- masked = (low_rc >> 14) & MASK5; /* 7 */
+
+ oligo = high_rc >> 24; /* For 31..28 */
+ oligo |= nextlow_rc << 8;
+
+#ifdef INDIVIDUAL_SHIFTS
+ masked = oligo & MASK5; /* 28 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 24;
+ *(--pointers[masked]) = chrpos - 28;
}
}
- masked = (low_rc >> 12) & MASK5; /* 6 */
+ masked = (oligo >> 2) & MASK5; /* 29 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 25;
+ *(--pointers[masked]) = chrpos - 29;
}
}
- masked = (low_rc >> 10) & MASK5; /* 5 */
+ masked = (oligo >> 4) & MASK5; /* 30 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 26;
+ *(--pointers[masked]) = chrpos - 30;
}
}
- masked = (low_rc >> 8) & MASK5; /* 4 */
+ masked = (oligo >> 6) & MASK5; /* 31 */
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 27;
+ *(--pointers[masked]) = chrpos - 31;
}
}
- masked = (low_rc >> 6) & MASK5; /* 3 */
+#else
+ _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+ _masked = _mm_and_si128(_oligo, mask5);
+
+ masked = _mm_extract_epi32(_masked,0);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 28;
+ *(--pointers[masked]) = chrpos - 28;
}
}
- masked = (low_rc >> 4) & MASK5; /* 2 */
+ masked = _mm_extract_epi32(_masked,1);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 29;
+ *(--pointers[masked]) = chrpos - 29;
}
}
- masked = (low_rc >> 2) & MASK5; /* 1 */
+ masked = _mm_extract_epi32(_masked,2);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 30;
+ *(--pointers[masked]) = chrpos - 30;
}
}
- masked = low_rc & MASK5; /* 0 */
+ masked = _mm_extract_epi32(_masked,3);
if (counts[masked]) {
- if (pointers[masked] == positions[masked/*+1*/]) {
+ if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
} else {
- *(pointers[masked]++) = chrpos + 31;
+ *(--pointers[masked]) = chrpos - 31;
}
}
+#endif
- return chrpos + 32;
+ return chrpos - 32;
}
-#if (!defined(USE_SIMD_FOR_COUNTS) || defined(DEBUG14))
+#ifndef USE_SIMD_FOR_COUNTS
static void
count_positions_rev_std (Count_T *counts, int indexsize, Univcoord_T left, Univcoord_T left_plus_length,
int genestrand) {
@@ -15825,16 +24070,11 @@ count_positions_rev_std (Count_T *counts, int indexsize, Univcoord_T left, Univc
debug(printf("Starting count_positions_rev_std\n"));
-#if 0
- /* No. This extends past the query */
- if (left != 0U) {
- left -= 1; /* Needed to get last oligomer to match */
- }
-#endif
+
left_plus_length -= indexsize;
- startptr = left/32U*3;
- ptr = endptr = left_plus_length/32U*3;
+ ptr = startptr = left/32U*3;
+ endptr = left_plus_length/32U*3;
startdiscard = left % 32; /* (left+pos5) % 32 */
enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
@@ -15865,7 +24105,9 @@ count_positions_rev_std (Count_T *counts, int indexsize, Univcoord_T left, Univc
high_rc = ~high;
nextlow_rc = ~nextlow;
- if (indexsize == 8) {
+ if (indexsize == 9) {
+ count_9mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+ } else if (indexsize == 8) {
count_8mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
} else if (indexsize == 7) {
count_7mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
@@ -15881,6 +24123,7 @@ count_positions_rev_std (Count_T *counts, int indexsize, Univcoord_T left, Univc
} else {
/* Genome_print_blocks(ref_blocks,left,left+16); */
+ /* Start block */
#ifdef WORDS_BIGENDIAN
high = Bigendian_convert_uint(ref_blocks[ptr]);
low = Bigendian_convert_uint(ref_blocks[ptr+1]);
@@ -15904,22 +24147,55 @@ count_positions_rev_std (Count_T *counts, int indexsize, Univcoord_T left, Univc
high_rc = ~high;
nextlow_rc = ~nextlow;
- if (indexsize == 8) {
- count_8mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ if (indexsize == 9) {
+ count_9mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 8) {
+ count_8mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
} else if (indexsize == 7) {
- count_7mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ count_7mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
} else if (indexsize == 6) {
- count_6mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ count_6mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
} else if (indexsize == 5) {
- count_5mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ count_5mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
} else {
+ fprintf(stderr,"indexsize %d not supported\n",indexsize);
abort();
}
- ptr -= 3;
+ ptr += 3;
+
+ /* Middle blocks */
+ if (indexsize == 9) {
+ while (ptr + 3 <= endptr) {
+#ifdef WORDS_BIGENDIAN
+ high = Bigendian_convert_uint(ref_blocks[ptr]);
+ low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high = ref_blocks[ptr];
+ low = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ low_rc = ~low;
+ high_rc = ~high;
+ nextlow_rc = ~nextlow;
+
+ count_9mers_rev(counts,low_rc,high_rc,nextlow_rc);
+ ptr += 3;
+ }
- if (indexsize == 8) {
- while (ptr > startptr) {
+ } else if (indexsize == 8) {
+ while (ptr + 3 <= endptr) {
#ifdef WORDS_BIGENDIAN
high = Bigendian_convert_uint(ref_blocks[ptr]);
low = Bigendian_convert_uint(ref_blocks[ptr+1]);
@@ -15944,10 +24220,11 @@ count_positions_rev_std (Count_T *counts, int indexsize, Univcoord_T left, Univc
nextlow_rc = ~nextlow;
count_8mers_rev(counts,low_rc,high_rc,nextlow_rc);
- ptr -= 3;
+ ptr += 3;
}
+
} else if (indexsize == 7) {
- while (ptr > startptr) {
+ while (ptr + 3 <= endptr) {
#ifdef WORDS_BIGENDIAN
high = Bigendian_convert_uint(ref_blocks[ptr]);
low = Bigendian_convert_uint(ref_blocks[ptr+1]);
@@ -15972,10 +24249,11 @@ count_positions_rev_std (Count_T *counts, int indexsize, Univcoord_T left, Univc
nextlow_rc = ~nextlow;
count_7mers_rev(counts,low_rc,high_rc,nextlow_rc);
- ptr -= 3;
+ ptr += 3;
}
+
} else if (indexsize == 6) {
- while (ptr > startptr) {
+ while (ptr + 3 <= endptr) {
#ifdef WORDS_BIGENDIAN
high = Bigendian_convert_uint(ref_blocks[ptr]);
low = Bigendian_convert_uint(ref_blocks[ptr+1]);
@@ -16000,10 +24278,11 @@ count_positions_rev_std (Count_T *counts, int indexsize, Univcoord_T left, Univc
nextlow_rc = ~nextlow;
count_6mers_rev(counts,low_rc,high_rc,nextlow_rc);
- ptr -= 3;
+ ptr += 3;
}
+
} else if (indexsize == 5) {
- while (ptr > startptr) {
+ while (ptr + 3 <= endptr) {
#ifdef WORDS_BIGENDIAN
high = Bigendian_convert_uint(ref_blocks[ptr]);
low = Bigendian_convert_uint(ref_blocks[ptr+1]);
@@ -16028,13 +24307,16 @@ count_positions_rev_std (Count_T *counts, int indexsize, Univcoord_T left, Univc
nextlow_rc = ~nextlow;
count_5mers_rev(counts,low_rc,high_rc,nextlow_rc);
- ptr -= 3;
+ ptr += 3;
}
} else {
abort();
}
+ /* End block */
+ assert(ptr == endptr);
+
#ifdef WORDS_BIGENDIAN
high = Bigendian_convert_uint(ref_blocks[ptr]);
low = Bigendian_convert_uint(ref_blocks[ptr+1]);
@@ -16058,18 +24340,20 @@ count_positions_rev_std (Count_T *counts, int indexsize, Univcoord_T left, Univc
high_rc = ~high;
nextlow_rc = ~nextlow;
- if (indexsize == 8) {
- count_8mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ if (indexsize == 9) {
+ count_9mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 8) {
+ count_8mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
} else if (indexsize == 7) {
- count_7mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ count_7mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
} else if (indexsize == 6) {
- count_6mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ count_6mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
} else if (indexsize == 5) {
- count_5mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ count_5mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
} else {
- fprintf(stderr,"indexsize %d not supported\n",indexsize);
abort();
}
+
}
return;
@@ -16079,68 +24363,67 @@ count_positions_rev_std (Count_T *counts, int indexsize, Univcoord_T left, Univc
#ifdef USE_SIMD_FOR_COUNTS
static void
-count_positions_rev_simd (Count_T *counts, int indexsize, Univcoord_T left, Univcoord_T left_plus_length,
- int genestrand) {
+count_positions_rev_simd (Count_T *counts, int indexsize,
+ Univcoord_T left, Univcoord_T left_plus_length, int genestrand) {
int startdiscard, enddiscard;
- Genomecomp_T ptr, startptr, endptr, low_rc, high_rc, nextlow_rc,
- low, high, nextlow;
- Genomecomp_T low1_rc, high1_rc, low0, high0, low1, high1;
- __m128i current, next;
- __m128i array[16];
+ Genomecomp_T ptr, startptr, endptr, nextlow_rc, nextlow;
+ Genomecomp_T low1_rc, high1_rc, high0, low1, high1;
+ __m128i current, next, invert3;
+ /* __m128i array[16]; */
#ifdef HAVE_SSE4_1
__m128i temp;
+#else
+ Genomecomp_T low0_rc, high0_rc;
#endif
debug(printf("Starting count_positions_rev_simd\n"));
-#if 0
- /* No. This extends past the query */
- if (left != 0U) {
- left -= 1; /* Needed to get last oligomer to match */
- }
-#endif
left_plus_length -= indexsize;
- startptr = left/32U*3;
- ptr = endptr = left_plus_length/32U*3;
+ ptr = startptr = left/32U*3;
+ endptr = left_plus_length/32U*3;
startdiscard = left % 32; /* (left+pos5) % 32 */
enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
+ invert3 = _mm_set_epi32(0x00000000,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF);
+
if (left_plus_length <= left) {
/* Skip */
} else if (startptr == endptr) {
#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low1 = Bigendian_convert_uint(ref_blocks[ptr+1]);
nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
+ high1 = ref_blocks[ptr];
+ low1 = ref_blocks[ptr+1];
nextlow = ref_blocks[ptr+4];
#endif
if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow);
} else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
}
}
- low_rc = ~low;
- high_rc = ~high;
+ low1_rc = ~low1;
+ high1_rc = ~high1;
nextlow_rc = ~nextlow;
- if (indexsize == 8) {
- count_8mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+ if (indexsize == 9) {
+ count_9mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
+ } else if (indexsize == 8) {
+ count_8mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
} else if (indexsize == 7) {
- count_7mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+ count_7mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
} else if (indexsize == 6) {
- count_6mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+ count_6mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
} else if (indexsize == 5) {
- count_5mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+ count_5mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
} else {
fprintf(stderr,"indexsize %d not supported\n",indexsize);
abort();
@@ -16149,375 +24432,461 @@ count_positions_rev_simd (Count_T *counts, int indexsize, Univcoord_T left, Univ
} else {
/* Genome_print_blocks(ref_blocks,left,left+16); */
+ /* Start block */
#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low1 = Bigendian_convert_uint(ref_blocks[ptr+1]);
nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
+ high1 = ref_blocks[ptr];
+ low1 = ref_blocks[ptr+1];
nextlow = ref_blocks[ptr+4];
#endif
if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow);
} else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
}
}
- low_rc = ~low;
- high_rc = ~high;
nextlow_rc = ~nextlow;
+ low1_rc = ~low1;
+ high1_rc = ~high1;
- if (indexsize == 8) {
- count_8mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ if (indexsize == 9) {
+ count_9mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 8) {
+ count_8mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
} else if (indexsize == 7) {
- count_7mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ count_7mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
} else if (indexsize == 6) {
- count_6mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ count_6mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
} else if (indexsize == 5) {
- count_5mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ count_5mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
} else {
+ fprintf(stderr,"indexsize %d not supported\n",indexsize);
abort();
}
- if (indexsize == 8) {
- while (ptr > startptr + 6) {
- ptr -= 6;
+ ptr += 3;
+ /* Middle blocks */
+ if (indexsize == 9) {
+ while (ptr + 6 <= endptr) {
#ifdef WORDS_BIGENDIAN
high0 = Bigendian_convert_uint(ref_blocks[ptr]);
- low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
- /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
#else
high0 = ref_blocks[ptr];
- low0 = ref_blocks[ptr+1];
+ /* low0 = ref_blocks[ptr+1]; */
high1 = ref_blocks[ptr+3];
low1 = ref_blocks[ptr+4];
- /* nextlow = ref_blocks[ptr+7]; */
+ nextlow = ref_blocks[ptr+7];
#endif
if (mode == CMET_STRANDED) {
- high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- /* nextlow = Cmet_reduce_ga(nextlow); */
+ nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- /* nextlow = Cmet_reduce_ct(nextlow); */
+ nextlow = Cmet_reduce_ct(nextlow);
} else {
- high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- /* nextlow = Cmet_reduce_ga(nextlow); */
+ nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
+ current = _mm_xor_si128(current,invert3);
+ nextlow_rc = ~nextlow;
+#ifdef HAVE_SSE4_1
+ temp = _mm_insert_epi32(current,nextlow_rc,0x03);
+ next = _mm_shuffle_epi32(temp,0x93);
+#else
+ high0_rc = _mm_extract_epi32(current,2);
+ low1_rc = _mm_extract_epi32(current,1);
+ high1_rc = _mm_extract_epi32(current,0);
+ next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
+#endif
+
+#if 0
+ extract_9mers_rev_simd(array,current,next);
+ count_fwdrev_simd(counts,(Genomecomp_T *) array);
+#else
+ count_9mers_rev_simd(counts,current,next);
+#endif
+ ptr += 6;
+ }
+
+ if (ptr + 3 <= endptr) {
+#ifdef WORDS_BIGENDIAN
+ high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+ /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high1 = ref_blocks[ptr];
+ /* low1 = ref_blocks[ptr+1]; */
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
}
}
- nextlow_rc = low_rc; /* depended on nextlow */
- low_rc = ~low0;
- high_rc = ~high0;
- low1_rc = ~low1;
+ /* low1_rc = ~low1; */
+ low1_rc = nextlow_rc;
+
+ nextlow_rc = ~nextlow;
high1_rc = ~high1;
- /* Use _set_ and not _setr_ */
- current = _mm_set_epi32(low_rc,high_rc,low1_rc,high1_rc);
+ count_9mers_rev(counts,low1_rc,high1_rc,nextlow_rc);
+ ptr += 3;
+ }
+
+ } else if (indexsize == 8) {
+ while (ptr + 6 <= endptr) {
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
+ high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
+ low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
+#else
+ high0 = ref_blocks[ptr];
+ /* low0 = ref_blocks[ptr+1]; */
+ high1 = ref_blocks[ptr+3];
+ low1 = ref_blocks[ptr+4];
+ nextlow = ref_blocks[ptr+7];
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+ nextlow = Cmet_reduce_ga(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+ nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
+ current = _mm_xor_si128(current,invert3);
+ nextlow_rc = ~nextlow;
#ifdef HAVE_SSE4_1
temp = _mm_insert_epi32(current,nextlow_rc,0x03);
next = _mm_shuffle_epi32(temp,0x93);
#else
- next = _mm_set_epi32(high_rc,low1_rc,high1_rc,nextlow_rc);
+ high0_rc = _mm_extract_epi32(current,2);
+ low1_rc = _mm_extract_epi32(current,1);
+ high1_rc = _mm_extract_epi32(current,0);
+ next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
#endif
+#if 0
extract_8mers_rev_simd(array,current,next);
count_fwdrev_simd(counts,(Genomecomp_T *) array);
+#else
+ count_8mers_rev_simd(counts,current,next);
+#endif
+ ptr += 6;
}
- if (ptr == startptr + 3) {
- ptr = startptr; /* ptr -= 3; */ /* ptr is now startptr */
- } else {
- ptr = startptr; /* ptr -= 6; */
-
+ if (ptr + 3 <= endptr) {
#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr+3]);
- low = Bigendian_convert_uint(ref_blocks[ptr+4]);
- /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+ high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+ /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
#else
- high = ref_blocks[ptr+3];
- low = ref_blocks[ptr+4];
- /* nextlow = ref_blocks[ptr+7]; */
+ high1 = ref_blocks[ptr];
+ /* low1 = ref_blocks[ptr+1]; */
+ nextlow = ref_blocks[ptr+4];
#endif
if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); /* nextlow = Cmet_reduce_ct(nextlow); */
+ high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
} else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
}
}
- nextlow_rc = low_rc; /* depended on nextlow; */
- low_rc = ~low;
- high_rc = ~high;
+ /* low1_rc = ~low1; */
+ low1_rc = nextlow_rc;
- count_8mers_rev(counts,low_rc,high_rc,nextlow_rc);
- /* ptr already at startptr */
+ nextlow_rc = ~nextlow;
+ high1_rc = ~high1;
+
+ count_8mers_rev(counts,low1_rc,high1_rc,nextlow_rc);
+ ptr += 3;
}
} else if (indexsize == 7) {
- while (ptr > startptr + 6) {
- ptr -= 6;
-
+ while (ptr + 6 <= endptr) {
#ifdef WORDS_BIGENDIAN
high0 = Bigendian_convert_uint(ref_blocks[ptr]);
- low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
- /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
#else
high0 = ref_blocks[ptr];
- low0 = ref_blocks[ptr+1];
+ /* low0 = ref_blocks[ptr+1]; */
high1 = ref_blocks[ptr+3];
low1 = ref_blocks[ptr+4];
- /* nextlow = ref_blocks[ptr+7]; */
+ nextlow = ref_blocks[ptr+7];
#endif
if (mode == CMET_STRANDED) {
- high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- /* nextlow = Cmet_reduce_ga(nextlow); */
+ nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- /* nextlow = Cmet_reduce_ct(nextlow); */
+ nextlow = Cmet_reduce_ct(nextlow);
} else {
- high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- /* nextlow = Cmet_reduce_ga(nextlow); */
+ nextlow = Cmet_reduce_ga(nextlow);
}
}
- nextlow_rc = low_rc; /* depended on nextlow */
- low_rc = ~low0;
- high_rc = ~high0;
- low1_rc = ~low1;
- high1_rc = ~high1;
-
- /* Use _set_ and not _setr_ */
- current = _mm_set_epi32(low_rc,high_rc,low1_rc,high1_rc);
+ current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
+ current = _mm_xor_si128(current,invert3);
+ nextlow_rc = ~nextlow;
#ifdef HAVE_SSE4_1
temp = _mm_insert_epi32(current,nextlow_rc,0x03);
next = _mm_shuffle_epi32(temp,0x93);
#else
- next = _mm_set_epi32(high_rc,low1_rc,high1_rc,nextlow_rc);
+ high0_rc = _mm_extract_epi32(current,2);
+ low1_rc = _mm_extract_epi32(current,1);
+ high1_rc = _mm_extract_epi32(current,0);
+ next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
#endif
+#if 0
extract_7mers_rev_simd(array,current,next);
count_fwdrev_simd(counts,(Genomecomp_T *) array);
+#else
+ count_7mers_rev_simd(counts,current,next);
+#endif
+ ptr += 6;
}
- if (ptr == startptr + 3) {
- ptr = startptr; /* ptr -= 3; */ /* ptr is now startptr */
- } else {
- ptr = startptr; /* ptr -= 6; */
-
+ if (ptr + 3 <= endptr) {
#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr+3]);
- low = Bigendian_convert_uint(ref_blocks[ptr+4]);
- /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+ high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+ /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
#else
- high = ref_blocks[ptr+3];
- low = ref_blocks[ptr+4];
- /* nextlow = ref_blocks[ptr+7]; */
+ high1 = ref_blocks[ptr];
+ /* low1 = ref_blocks[ptr+1]; */
+ nextlow = ref_blocks[ptr+4];
#endif
if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); /* nextlow = Cmet_reduce_ct(nextlow); */
+ high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
} else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
}
}
- nextlow_rc = low_rc; /* depended on nextlow */
- low_rc = ~low;
- high_rc = ~high;
+ /* low1_rc = ~low1; */
+ low1_rc = nextlow_rc;
- count_7mers_rev(counts,low_rc,high_rc,nextlow_rc);
- /* ptr already at startptr */
+ nextlow_rc = ~nextlow;
+ high1_rc = ~high1;
+
+ count_7mers_rev(counts,low1_rc,high1_rc,nextlow_rc);
+ ptr += 3;
}
} else if (indexsize == 6) {
- while (ptr > startptr + 6) {
- ptr -= 6;
-
+ while (ptr + 6 <= endptr) {
#ifdef WORDS_BIGENDIAN
high0 = Bigendian_convert_uint(ref_blocks[ptr]);
- low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
- /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
#else
high0 = ref_blocks[ptr];
- low0 = ref_blocks[ptr+1];
+ /* low0 = ref_blocks[ptr+1]; */
high1 = ref_blocks[ptr+3];
low1 = ref_blocks[ptr+4];
- /* nextlow = ref_blocks[ptr+7]; */
+ nextlow = ref_blocks[ptr+7];
#endif
if (mode == CMET_STRANDED) {
- high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- /* nextlow = Cmet_reduce_ga(nextlow); */
+ nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- /* nextlow = Cmet_reduce_ct(nextlow); */
+ nextlow = Cmet_reduce_ct(nextlow);
} else {
- high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- /* nextlow = Cmet_reduce_ga(nextlow); */
+ nextlow = Cmet_reduce_ga(nextlow);
}
}
- nextlow_rc = low_rc; /* depended on nextlow */
- low_rc = ~low0;
- high_rc = ~high0;
- low1_rc = ~low1;
- high1_rc = ~high1;
-
- /* Use _set_ and not _setr_ */
- current = _mm_set_epi32(low_rc,high_rc,low1_rc,high1_rc);
+ current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
+ current = _mm_xor_si128(current,invert3);
+ nextlow_rc = ~nextlow;
#ifdef HAVE_SSE4_1
temp = _mm_insert_epi32(current,nextlow_rc,0x03);
next = _mm_shuffle_epi32(temp,0x93);
#else
- next = _mm_set_epi32(high_rc,low1_rc,high1_rc,nextlow_rc);
+ high0_rc = _mm_extract_epi32(current,2);
+ low1_rc = _mm_extract_epi32(current,1);
+ high1_rc = _mm_extract_epi32(current,0);
+ next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
#endif
+#if 0
extract_6mers_rev_simd(array,current,next);
count_fwdrev_simd(counts,(Genomecomp_T *) array);
+#else
+ count_6mers_rev_simd(counts,current,next);
+#endif
+ ptr += 6;
}
- if (ptr == startptr + 3) {
- ptr = startptr; /* ptr -= 3; */ /* ptr is now startptr */
- } else {
- ptr = startptr; /* ptr -= 6; */
-
+ if (ptr + 3 <= endptr) {
#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr+3]);
- low = Bigendian_convert_uint(ref_blocks[ptr+4]);
- /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+ high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+ /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
#else
- high = ref_blocks[ptr+3];
- low = ref_blocks[ptr+4];
- /* nextlow = ref_blocks[ptr+7]; */
+ high1 = ref_blocks[ptr];
+ /* low1 = ref_blocks[ptr+1]; */
+ nextlow = ref_blocks[ptr+4];
#endif
if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); /* nextlow = Cmet_reduce_ct(nextlow); */
+ high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
} else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
}
}
- nextlow_rc = low_rc; /* depended on nextlow */
- low_rc = ~low;
- high_rc = ~high;
+ /* low1_rc = ~low1; */
+ low1_rc = nextlow_rc;
- count_6mers_rev(counts,low_rc,high_rc,nextlow_rc);
- /* ptr already at startptr */
+ nextlow_rc = ~nextlow;
+ high1_rc = ~high1;
+
+ count_6mers_rev(counts,low1_rc,high1_rc,nextlow_rc);
+ ptr += 3;
}
} else if (indexsize == 5) {
- while (ptr > startptr + 6) {
- ptr -= 6;
-
+ while (ptr + 6 <= endptr) {
#ifdef WORDS_BIGENDIAN
high0 = Bigendian_convert_uint(ref_blocks[ptr]);
- low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
- /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
#else
high0 = ref_blocks[ptr];
- low0 = ref_blocks[ptr+1];
+ /* low0 = ref_blocks[ptr+1]; */
high1 = ref_blocks[ptr+3];
low1 = ref_blocks[ptr+4];
- /* nextlow = ref_blocks[ptr+7]; */
+ nextlow = ref_blocks[ptr+7];
#endif
if (mode == CMET_STRANDED) {
- high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- /* nextlow = Cmet_reduce_ga(nextlow); */
+ nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- /* nextlow = Cmet_reduce_ct(nextlow); */
+ nextlow = Cmet_reduce_ct(nextlow);
} else {
- high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- /* nextlow = Cmet_reduce_ga(nextlow); */
+ nextlow = Cmet_reduce_ga(nextlow);
}
}
- nextlow_rc = low_rc; /* depended on nextlow */
- low_rc = ~low0;
- high_rc = ~high0;
- low1_rc = ~low1;
- high1_rc = ~high1;
-
- /* Use _set_ and not _setr_ */
- current = _mm_set_epi32(low_rc,high_rc,low1_rc,high1_rc);
+ current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
+ current = _mm_xor_si128(current,invert3);
+ nextlow_rc = ~nextlow;
#ifdef HAVE_SSE4_1
temp = _mm_insert_epi32(current,nextlow_rc,0x03);
next = _mm_shuffle_epi32(temp,0x93);
#else
- next = _mm_set_epi32(high_rc,low1_rc,high1_rc,nextlow_rc);
+ high0_rc = _mm_extract_epi32(current,2);
+ low1_rc = _mm_extract_epi32(current,1);
+ high1_rc = _mm_extract_epi32(current,0);
+ next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
#endif
+#if 0
extract_5mers_rev_simd(array,current,next);
count_fwdrev_simd(counts,(Genomecomp_T *) array);
+#else
+ count_5mers_rev_simd(counts,current,next);
+#endif
+ ptr += 6;
}
- if (ptr == startptr + 3) {
- ptr = startptr; /* ptr -= 3; */ /* ptr is now startptr */
- } else {
- ptr = startptr; /* ptr -= 6; */
-
+ if (ptr + 3 <= endptr) {
#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr+3]);
- low = Bigendian_convert_uint(ref_blocks[ptr+4]);
- /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+ high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+ /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
#else
- high = ref_blocks[ptr+3];
- low = ref_blocks[ptr+4];
- /* nextlow = ref_blocks[ptr+7]; */
+ high1 = ref_blocks[ptr];
+ /* low1 = ref_blocks[ptr+1]; */
+ nextlow = ref_blocks[ptr+4];
#endif
if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); /* nextlow = Cmet_reduce_ct(nextlow); */
+ high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
} else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
}
}
- nextlow_rc = low_rc; /* depended on nextlow */
- low_rc = ~low;
- high_rc = ~high;
+ /* low1_rc = ~low1; */
+ low1_rc = nextlow_rc;
- count_5mers_rev(counts,low_rc,high_rc,nextlow_rc);
- /* ptr already at startptr */
+ nextlow_rc = ~nextlow;
+ high1_rc = ~high1;
+
+ count_5mers_rev(counts,low1_rc,high1_rc,nextlow_rc);
+ ptr += 3;
}
} else {
@@ -16525,39 +24894,45 @@ count_positions_rev_simd (Count_T *counts, int indexsize, Univcoord_T left, Univ
}
+ /* End block */
+ assert(ptr == endptr);
+
#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
+ high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+ /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- /* nextlow = ref_blocks[ptr+4]; */
+ high1 = ref_blocks[ptr];
+ /* low1 = ref_blocks[ptr+1]; */
+ nextlow = ref_blocks[ptr+4];
#endif
if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); /* nextlow = Cmet_reduce_ct(nextlow); */
+ high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
} else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
}
}
- nextlow_rc = low_rc; /* depended on nextlow */
- low_rc = ~low;
- high_rc = ~high;
+ /* low1_rc = ~low1; */
+ low1_rc = nextlow_rc;
- if (indexsize == 8) {
- count_8mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ nextlow_rc = ~nextlow;
+ high1_rc = ~high1;
+
+ if (indexsize == 9) {
+ count_9mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 8) {
+ count_8mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
} else if (indexsize == 7) {
- count_7mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ count_7mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
} else if (indexsize == 6) {
- count_6mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ count_6mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
} else if (indexsize == 5) {
- count_5mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ count_5mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
} else {
- fprintf(stderr,"indexsize %d not supported\n",indexsize);
abort();
}
}
@@ -16567,7 +24942,7 @@ count_positions_rev_simd (Count_T *counts, int indexsize, Univcoord_T left, Univ
#endif
-#if (!defined(USE_SIMD_FOR_COUNTS) || defined(DEBUG14))
+#ifndef USE_SIMD_FOR_COUNTS
static void
store_positions_rev_std (Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts, int indexsize,
Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos,
@@ -16577,16 +24952,11 @@ store_positions_rev_std (Chrpos_T **pointers, Chrpos_T **positions, Count_T *cou
low, high, nextlow;
-#if 0
- /* No. This extends past the query */
- if (left != 0U) {
- left -= 1; /* Needed to get last oligomer to match */
- }
-#endif
left_plus_length -= indexsize;
+ chrpos += (left_plus_length - left); /* We are starting from the right */
- startptr = left/32U*3;
- ptr = endptr = left_plus_length/32U*3;
+ ptr = startptr = left/32U*3;
+ endptr = left_plus_length/32U*3;
startdiscard = left % 32; /* (left+pos5) % 32 */
enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
@@ -16617,7 +24987,9 @@ store_positions_rev_std (Chrpos_T **pointers, Chrpos_T **positions, Count_T *cou
high_rc = ~high;
nextlow_rc = ~nextlow;
- if (indexsize == 8) {
+ if (indexsize == 9) {
+ chrpos = store_9mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+ } else if (indexsize == 8) {
chrpos = store_8mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
} else if (indexsize == 7) {
chrpos = store_7mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
@@ -16633,6 +25005,7 @@ store_positions_rev_std (Chrpos_T **pointers, Chrpos_T **positions, Count_T *cou
} else {
/* Genome_print_blocks(ref_blocks,left,left+16); */
+ /* Start block */
#ifdef WORDS_BIGENDIAN
high = Bigendian_convert_uint(ref_blocks[ptr]);
low = Bigendian_convert_uint(ref_blocks[ptr+1]);
@@ -16656,22 +25029,55 @@ store_positions_rev_std (Chrpos_T **pointers, Chrpos_T **positions, Count_T *cou
high_rc = ~high;
nextlow_rc = ~nextlow;
- if (indexsize == 8) {
- chrpos = store_8mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ if (indexsize == 9) {
+ chrpos = store_9mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 8) {
+ chrpos = store_8mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
} else if (indexsize == 7) {
- chrpos = store_7mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ chrpos = store_7mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
} else if (indexsize == 6) {
- chrpos = store_6mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ chrpos = store_6mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
} else if (indexsize == 5) {
- chrpos = store_5mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ chrpos = store_5mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
} else {
+ fprintf(stderr,"indexsize %d not supported\n",indexsize);
abort();
}
- ptr -= 3;
+ ptr += 3;
+
+ /* Middle blocks */
+ if (indexsize == 9) {
+ while (ptr + 3 <= endptr) {
+#ifdef WORDS_BIGENDIAN
+ high = Bigendian_convert_uint(ref_blocks[ptr]);
+ low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high = ref_blocks[ptr];
+ low = ref_blocks[ptr+1];
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ low_rc = ~low;
+ high_rc = ~high;
+ nextlow_rc = ~nextlow;
+
+ chrpos = store_9mers_rev(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc);
+ ptr += 3;
+ }
- if (indexsize == 8) {
- while (ptr > startptr) {
+ } else if (indexsize == 8) {
+ while (ptr + 3 <= endptr) {
#ifdef WORDS_BIGENDIAN
high = Bigendian_convert_uint(ref_blocks[ptr]);
low = Bigendian_convert_uint(ref_blocks[ptr+1]);
@@ -16696,11 +25102,11 @@ store_positions_rev_std (Chrpos_T **pointers, Chrpos_T **positions, Count_T *cou
nextlow_rc = ~nextlow;
chrpos = store_8mers_rev(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc);
- ptr -= 3;
+ ptr += 3;
}
} else if (indexsize == 7) {
- while (ptr > startptr) {
+ while (ptr + 3 <= endptr) {
#ifdef WORDS_BIGENDIAN
high = Bigendian_convert_uint(ref_blocks[ptr]);
low = Bigendian_convert_uint(ref_blocks[ptr+1]);
@@ -16725,11 +25131,11 @@ store_positions_rev_std (Chrpos_T **pointers, Chrpos_T **positions, Count_T *cou
nextlow_rc = ~nextlow;
chrpos = store_7mers_rev(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc);
- ptr -= 3;
+ ptr += 3;
}
} else if (indexsize == 6) {
- while (ptr > startptr) {
+ while (ptr + 3 <= endptr) {
#ifdef WORDS_BIGENDIAN
high = Bigendian_convert_uint(ref_blocks[ptr]);
low = Bigendian_convert_uint(ref_blocks[ptr+1]);
@@ -16754,11 +25160,11 @@ store_positions_rev_std (Chrpos_T **pointers, Chrpos_T **positions, Count_T *cou
nextlow_rc = ~nextlow;
chrpos = store_6mers_rev(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc);
- ptr -= 3;
+ ptr += 3;
}
} else if (indexsize == 5) {
- while (ptr > startptr) {
+ while (ptr + 3 <= endptr) {
#ifdef WORDS_BIGENDIAN
high = Bigendian_convert_uint(ref_blocks[ptr]);
low = Bigendian_convert_uint(ref_blocks[ptr+1]);
@@ -16783,13 +25189,17 @@ store_positions_rev_std (Chrpos_T **pointers, Chrpos_T **positions, Count_T *cou
nextlow_rc = ~nextlow;
chrpos = store_5mers_rev(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc);
- ptr -= 3;
+ ptr += 3;
}
+
} else {
abort();
}
+ /* End block */
+ assert(ptr == endptr);
+
#ifdef WORDS_BIGENDIAN
high = Bigendian_convert_uint(ref_blocks[ptr]);
low = Bigendian_convert_uint(ref_blocks[ptr+1]);
@@ -16813,16 +25223,17 @@ store_positions_rev_std (Chrpos_T **pointers, Chrpos_T **positions, Count_T *cou
high_rc = ~high;
nextlow_rc = ~nextlow;
- if (indexsize == 8) {
- chrpos = store_8mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ if (indexsize == 9) {
+ chrpos = store_9mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 8) {
+ chrpos = store_8mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
} else if (indexsize == 7) {
- chrpos = store_7mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ chrpos = store_7mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
} else if (indexsize == 6) {
- chrpos = store_6mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ chrpos = store_6mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
} else if (indexsize == 5) {
- chrpos = store_5mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ chrpos = store_5mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
} else {
- fprintf(stderr,"indexsize %d not supported\n",indexsize);
abort();
}
}
@@ -16837,64 +25248,64 @@ store_positions_rev_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos,
int genestrand) {
int startdiscard, enddiscard;
- Genomecomp_T ptr, startptr, endptr, low_rc, high_rc, nextlow_rc,
- low, high, nextlow;
- Genomecomp_T low1_rc, high1_rc, low0, high0, low1, high1;
- __m128i current, next;
+ Genomecomp_T ptr, startptr, endptr, nextlow_rc, nextlow;
+ Genomecomp_T low1_rc, high1_rc, high0, low1, high1;
+ __m128i current, next, invert3;
__m128i array[16];
#ifdef HAVE_SSE4_1
__m128i temp;
+#else
+ Genomecomp_T low0_rc, high0_rc;
#endif
-#if 0
- /* No. This extends past the query */
- if (left != 0U) {
- left -= 1; /* Needed to get last oligomer to match */
- }
-#endif
left_plus_length -= indexsize;
+ chrpos += (left_plus_length - left); /* We are starting from the right */
- startptr = left/32U*3;
- ptr = endptr = left_plus_length/32U*3;
+ ptr = startptr = left/32U*3;
+ endptr = left_plus_length/32U*3;
startdiscard = left % 32; /* (left+pos5) % 32 */
enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
+ invert3 = _mm_set_epi32(0x00000000,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF);
+
if (left_plus_length <= left) {
/* Skip */
} else if (startptr == endptr) {
#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low1 = Bigendian_convert_uint(ref_blocks[ptr+1]);
nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
+ high1 = ref_blocks[ptr];
+ low1 = ref_blocks[ptr+1];
nextlow = ref_blocks[ptr+4];
#endif
if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow);
} else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
}
}
- low_rc = ~low;
- high_rc = ~high;
+ low1_rc = ~low1;
+ high1_rc = ~high1;
nextlow_rc = ~nextlow;
- if (indexsize == 8) {
- chrpos = store_8mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+ if (indexsize == 9) {
+ chrpos = store_9mers_rev_partial(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
+ } else if (indexsize == 8) {
+ chrpos = store_8mers_rev_partial(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
} else if (indexsize == 7) {
- chrpos = store_7mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+ chrpos = store_7mers_rev_partial(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
} else if (indexsize == 6) {
- chrpos = store_6mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+ chrpos = store_6mers_rev_partial(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
} else if (indexsize == 5) {
- chrpos = store_5mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+ chrpos = store_5mers_rev_partial(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
} else {
fprintf(stderr,"indexsize %d not supported\n",indexsize);
abort();
@@ -16903,375 +25314,441 @@ store_positions_rev_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
} else {
/* Genome_print_blocks(ref_blocks,left,left+16); */
+ /* Start block */
#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+ low1 = Bigendian_convert_uint(ref_blocks[ptr+1]);
nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
+ high1 = ref_blocks[ptr];
+ low1 = ref_blocks[ptr+1];
nextlow = ref_blocks[ptr+4];
#endif
if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow);
} else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
}
}
- low_rc = ~low;
- high_rc = ~high;
nextlow_rc = ~nextlow;
+ low1_rc = ~low1;
+ high1_rc = ~high1;
- if (indexsize == 8) {
- chrpos = store_8mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ if (indexsize == 9) {
+ chrpos = store_9mers_rev_partial(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ } else if (indexsize == 8) {
+ chrpos = store_8mers_rev_partial(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
} else if (indexsize == 7) {
- chrpos = store_7mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ chrpos = store_7mers_rev_partial(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
} else if (indexsize == 6) {
- chrpos = store_6mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ chrpos = store_6mers_rev_partial(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
} else if (indexsize == 5) {
- chrpos = store_5mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ chrpos = store_5mers_rev_partial(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
} else {
+ fprintf(stderr,"indexsize %d not supported\n",indexsize);
abort();
}
- if (indexsize == 8) {
- while (ptr > startptr + 6) {
- ptr -= 6;
+ ptr += 3;
+ /* Middle blocks */
+ if (indexsize == 9) {
+ while (ptr + 6 <= endptr) {
#ifdef WORDS_BIGENDIAN
high0 = Bigendian_convert_uint(ref_blocks[ptr]);
- low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
- /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
#else
high0 = ref_blocks[ptr];
- low0 = ref_blocks[ptr+1];
+ /* low0 = ref_blocks[ptr+1]; */
high1 = ref_blocks[ptr+3];
low1 = ref_blocks[ptr+4];
- /* nextlow = ref_blocks[ptr+7]; */
+ nextlow = ref_blocks[ptr+7];
#endif
if (mode == CMET_STRANDED) {
- high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- /* nextlow = Cmet_reduce_ga(nextlow); */
+ nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- /* nextlow = Cmet_reduce_ct(nextlow); */
+ nextlow = Cmet_reduce_ct(nextlow);
} else {
- high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- /* nextlow = Cmet_reduce_ga(nextlow); */
+ nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
+ current = _mm_xor_si128(current,invert3);
+ nextlow_rc = ~nextlow;
+#ifdef HAVE_SSE4_1
+ temp = _mm_insert_epi32(current,nextlow_rc,0x03);
+ next = _mm_shuffle_epi32(temp,0x93);
+#else
+ high0_rc = _mm_extract_epi32(current,2);
+ low1_rc = _mm_extract_epi32(current,1);
+ high1_rc = _mm_extract_epi32(current,0);
+ next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
+#endif
+
+ extract_9mers_rev_simd(array,current,next);
+ chrpos = store_fwdrev_simd(chrpos,pointers,positions,counts,(Genomecomp_T *) array);
+ ptr += 6;
+ }
+
+ if (ptr + 3 <= endptr) {
+#ifdef WORDS_BIGENDIAN
+ high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+ /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+ high1 = ref_blocks[ptr];
+ /* low1 = ref_blocks[ptr+1]; */
+ nextlow = ref_blocks[ptr+4];
+#endif
+ if (mode == CMET_STRANDED) {
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
}
}
- nextlow_rc = low_rc; /* depended on nextlow */
- low_rc = ~low0;
- high_rc = ~high0;
- low1_rc = ~low1;
+ /* low1_rc = ~low1; */
+ low1_rc = nextlow_rc;
+
+ nextlow_rc = ~nextlow;
high1_rc = ~high1;
- /* Use _set_ and not _setr_ */
- current = _mm_set_epi32(low_rc,high_rc,low1_rc,high1_rc);
+ chrpos = store_9mers_rev(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc);
+ ptr += 3;
+ }
+
+ } else if (indexsize == 8) {
+ while (ptr + 6 <= endptr) {
+#ifdef WORDS_BIGENDIAN
+ high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+ /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
+ high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
+ low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
+#else
+ high0 = ref_blocks[ptr];
+ /* low0 = ref_blocks[ptr+1]; */
+ high1 = ref_blocks[ptr+3];
+ low1 = ref_blocks[ptr+4];
+ nextlow = ref_blocks[ptr+7];
+#endif
+ if (mode == CMET_STRANDED) {
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+ nextlow = Cmet_reduce_ga(nextlow);
+ } else if (mode == CMET_NONSTRANDED) {
+ if (genestrand > 0) {
+ high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
+ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+ nextlow = Cmet_reduce_ct(nextlow);
+ } else {
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
+ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+ nextlow = Cmet_reduce_ga(nextlow);
+ }
+ }
+
+ current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
+ current = _mm_xor_si128(current,invert3);
+ nextlow_rc = ~nextlow;
#ifdef HAVE_SSE4_1
temp = _mm_insert_epi32(current,nextlow_rc,0x03);
next = _mm_shuffle_epi32(temp,0x93);
#else
- next = _mm_set_epi32(high_rc,low1_rc,high1_rc,nextlow_rc);
+ high0_rc = _mm_extract_epi32(current,2);
+ low1_rc = _mm_extract_epi32(current,1);
+ high1_rc = _mm_extract_epi32(current,0);
+ next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
#endif
extract_8mers_rev_simd(array,current,next);
chrpos = store_fwdrev_simd(chrpos,pointers,positions,counts,(Genomecomp_T *) array);
+ ptr += 6;
}
- if (ptr == startptr + 3) {
- ptr = startptr; /* ptr -= 3; */ /* ptr is now startptr */
- } else {
- ptr = startptr; /* ptr -= 6; */
-
+ if (ptr + 3 <= endptr) {
#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr+3]);
- low = Bigendian_convert_uint(ref_blocks[ptr+4]);
- /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+ high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+ /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
#else
- high = ref_blocks[ptr+3];
- low = ref_blocks[ptr+4];
- /* nextlow = ref_blocks[ptr+7]; */
+ high1 = ref_blocks[ptr];
+ /* low1 = ref_blocks[ptr+1]; */
+ nextlow = ref_blocks[ptr+4];
#endif
if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); /* nextlow = Cmet_reduce_ct(nextlow); */
+ high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
} else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
}
}
- nextlow_rc = low_rc; /* depended on nextlow */
- low_rc = ~low;
- high_rc = ~high;
+ /* low1_rc = ~low1; */
+ low1_rc = nextlow_rc;
- chrpos = store_8mers_rev(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc);
- /* ptr already at startptr */
+ nextlow_rc = ~nextlow;
+ high1_rc = ~high1;
+
+ chrpos = store_8mers_rev(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc);
+ ptr += 3;
}
} else if (indexsize == 7) {
- while (ptr > startptr + 6) {
- ptr -= 6;
-
+ while (ptr + 6 <= endptr) {
#ifdef WORDS_BIGENDIAN
high0 = Bigendian_convert_uint(ref_blocks[ptr]);
- low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
- /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
#else
high0 = ref_blocks[ptr];
- low0 = ref_blocks[ptr+1];
+ /* low0 = ref_blocks[ptr+1]; */
high1 = ref_blocks[ptr+3];
low1 = ref_blocks[ptr+4];
- /* nextlow = ref_blocks[ptr+7]; */
+ nextlow = ref_blocks[ptr+7];
#endif
if (mode == CMET_STRANDED) {
- high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- /* nextlow = Cmet_reduce_ga(nextlow); */
+ nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- /* nextlow = Cmet_reduce_ct(nextlow); */
+ nextlow = Cmet_reduce_ct(nextlow);
} else {
- high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- /* nextlow = Cmet_reduce_ga(nextlow); */
+ nextlow = Cmet_reduce_ga(nextlow);
}
}
- nextlow_rc = low_rc; /* depended on nextlow */
- low_rc = ~low0;
- high_rc = ~high0;
- low1_rc = ~low1;
- high1_rc = ~high1;
-
- /* Use _set_ and not _setr_ */
- current = _mm_set_epi32(low_rc,high_rc,low1_rc,high1_rc);
+ current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
+ current = _mm_xor_si128(current,invert3);
+ nextlow_rc = ~nextlow;
#ifdef HAVE_SSE4_1
temp = _mm_insert_epi32(current,nextlow_rc,0x03);
next = _mm_shuffle_epi32(temp,0x93);
#else
- next = _mm_set_epi32(high_rc,low1_rc,high1_rc,nextlow_rc);
+ high0_rc = _mm_extract_epi32(current,2);
+ low1_rc = _mm_extract_epi32(current,1);
+ high1_rc = _mm_extract_epi32(current,0);
+ next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
#endif
extract_7mers_rev_simd(array,current,next);
chrpos = store_fwdrev_simd(chrpos,pointers,positions,counts,(Genomecomp_T *) array);
+ ptr += 6;
}
- if (ptr == startptr + 3) {
- ptr = startptr; /* ptr -= 3; */ /* ptr is now startptr */
- } else {
- ptr = startptr; /* ptr -= 6; */
-
+ if (ptr + 3 <= endptr) {
#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr+3]);
- low = Bigendian_convert_uint(ref_blocks[ptr+4]);
- /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+ high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+ /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]);*/
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
#else
- high = ref_blocks[ptr+3];
- low = ref_blocks[ptr+4];
- /* nextlow = ref_blocks[ptr+7]; */
+ high1 = ref_blocks[ptr];
+ /* low1 = ref_blocks[ptr+1]; */
+ nextlow = ref_blocks[ptr+4];
#endif
if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); /* nextlow = Cmet_reduce_ct(nextlow); */
+ high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
} else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
}
}
- nextlow_rc = low_rc; /* depended on nextlow */
- low_rc = ~low;
- high_rc = ~high;
+ /* low1_rc = ~low1; */
+ low1_rc = nextlow_rc;
- chrpos = store_7mers_rev(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc);
- /* ptr already at startptr */
+ nextlow_rc = ~nextlow;
+ high1_rc = ~high1;
+
+ chrpos = store_7mers_rev(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc);
+ ptr += 3;
}
} else if (indexsize == 6) {
- while (ptr > startptr + 6) {
- ptr -= 6;
-
+ while (ptr + 6 <= endptr) {
#ifdef WORDS_BIGENDIAN
high0 = Bigendian_convert_uint(ref_blocks[ptr]);
- low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
- /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
#else
high0 = ref_blocks[ptr];
- low0 = ref_blocks[ptr+1];
+ /* low0 = ref_blocks[ptr+1]; */
high1 = ref_blocks[ptr+3];
low1 = ref_blocks[ptr+4];
- /* nextlow = ref_blocks[ptr+7]; */
+ nextlow = ref_blocks[ptr+7];
#endif
if (mode == CMET_STRANDED) {
- high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- /* nextlow = Cmet_reduce_ga(nextlow); */
+ nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- /* nextlow = Cmet_reduce_ct(nextlow); */
+ nextlow = Cmet_reduce_ct(nextlow);
} else {
- high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- /* nextlow = Cmet_reduce_ga(nextlow); */
+ nextlow = Cmet_reduce_ga(nextlow);
}
}
- nextlow_rc = low_rc; /* depended on nextlow */
- low_rc = ~low0;
- high_rc = ~high0;
- low1_rc = ~low1;
- high1_rc = ~high1;
-
- /* Use _set_ and not _setr_ */
- current = _mm_set_epi32(low_rc,high_rc,low1_rc,high1_rc);
+ current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
+ current = _mm_xor_si128(current,invert3);
+ nextlow_rc = ~nextlow;
#ifdef HAVE_SSE4_1
temp = _mm_insert_epi32(current,nextlow_rc,0x03);
next = _mm_shuffle_epi32(temp,0x93);
#else
- next = _mm_set_epi32(high_rc,low1_rc,high1_rc,nextlow_rc);
+ high0_rc = _mm_extract_epi32(current,2);
+ low1_rc = _mm_extract_epi32(current,1);
+ high1_rc = _mm_extract_epi32(current,0);
+ next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
#endif
extract_6mers_rev_simd(array,current,next);
chrpos = store_fwdrev_simd(chrpos,pointers,positions,counts,(Genomecomp_T *) array);
+ ptr += 6;
}
- if (ptr == startptr + 3) {
- ptr = startptr; /* ptr -= 3; */ /* ptr is now startptr */
- } else {
- ptr = startptr; /* ptr -= 6; */
-
+ if (ptr + 3 <= endptr) {
#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr+3]);
- low = Bigendian_convert_uint(ref_blocks[ptr+4]);
- /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+ high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+ /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
#else
- high = ref_blocks[ptr+3];
- low = ref_blocks[ptr+4];
- /* nextlow = ref_blocks[ptr+7]; */
+ high1 = ref_blocks[ptr];
+ /* low1 = ref_blocks[ptr+1]; */
+ nextlow = ref_blocks[ptr+4];
#endif
if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); /* nextlow = Cmet_reduce_ct(nextlow); */
+ high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
} else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
}
}
- nextlow_rc = low_rc; /* depended on nextlow */
- low_rc = ~low;
- high_rc = ~high;
+ /* low1_rc = ~low1; */
+ low1_rc = nextlow_rc;
- chrpos = store_6mers_rev(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc);
- /* ptr already at startptr */
+ nextlow_rc = ~nextlow;
+ high1_rc = ~high1;
+
+ chrpos = store_6mers_rev(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc);
+ ptr += 3;
}
} else if (indexsize == 5) {
- while (ptr > startptr + 6) {
- ptr -= 6;
-
+ while (ptr + 6 <= endptr) {
#ifdef WORDS_BIGENDIAN
high0 = Bigendian_convert_uint(ref_blocks[ptr]);
- low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+ /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
- /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
#else
high0 = ref_blocks[ptr];
- low0 = ref_blocks[ptr+1];
+ /* low0 = ref_blocks[ptr+1]; */
high1 = ref_blocks[ptr+3];
low1 = ref_blocks[ptr+4];
- /* nextlow = ref_blocks[ptr+7]; */
+ nextlow = ref_blocks[ptr+7];
#endif
if (mode == CMET_STRANDED) {
- high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- /* nextlow = Cmet_reduce_ga(nextlow); */
+ nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+ high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
- /* nextlow = Cmet_reduce_ct(nextlow); */
+ nextlow = Cmet_reduce_ct(nextlow);
} else {
- high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+ high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
- /* nextlow = Cmet_reduce_ga(nextlow); */
+ nextlow = Cmet_reduce_ga(nextlow);
}
}
- nextlow_rc = low_rc; /* depended on nextlow */
- low_rc = ~low0;
- high_rc = ~high0;
- low1_rc = ~low1;
- high1_rc = ~high1;
-
- /* Use _set_ and not _setr_ */
- current = _mm_set_epi32(low_rc,high_rc,low1_rc,high1_rc);
+ current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
+ current = _mm_xor_si128(current,invert3);
+ nextlow_rc = ~nextlow;
#ifdef HAVE_SSE4_1
temp = _mm_insert_epi32(current,nextlow_rc,0x03);
next = _mm_shuffle_epi32(temp,0x93);
#else
- next = _mm_set_epi32(high_rc,low1_rc,high1_rc,nextlow_rc);
+ high0_rc = _mm_extract_epi32(current,2);
+ low1_rc = _mm_extract_epi32(current,1);
+ high1_rc = _mm_extract_epi32(current,0);
+ next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
#endif
extract_5mers_rev_simd(array,current,next);
chrpos = store_fwdrev_simd(chrpos,pointers,positions,counts,(Genomecomp_T *) array);
+ ptr += 6;
}
- if (ptr == startptr + 3) {
- ptr = startptr; /* ptr -= 3; */ /* ptr is now startptr */
- } else {
- ptr = startptr; /* ptr -= 6; */
-
+ if (ptr + 3 <= endptr) {
#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr+3]);
- low = Bigendian_convert_uint(ref_blocks[ptr+4]);
- /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+ high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+ /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
#else
- high = ref_blocks[ptr+3];
- low = ref_blocks[ptr+4];
- /* nextlow = ref_blocks[ptr+7]; */
+ high1 = ref_blocks[ptr];
+ /* low1 = ref_blocks[ptr+1]; */
+ nextlow = ref_blocks[ptr+4];
#endif
if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); /* nextlow = Cmet_reduce_ct(nextlow); */
+ high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
} else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
}
}
- nextlow_rc = low_rc; /* depended on nextlow */
- low_rc = ~low;
- high_rc = ~high;
+ /* low1_rc = ~low1; */
+ low1_rc = nextlow_rc;
- chrpos = store_5mers_rev(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc);
- /* ptr already at startptr */
+ nextlow_rc = ~nextlow;
+ high1_rc = ~high1;
+
+ chrpos = store_5mers_rev(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc);
+ ptr += 3;
}
} else {
@@ -17279,39 +25756,45 @@ store_positions_rev_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
}
+ /* End block */
+ assert(ptr == endptr);
+
#ifdef WORDS_BIGENDIAN
- high = Bigendian_convert_uint(ref_blocks[ptr]);
- low = Bigendian_convert_uint(ref_blocks[ptr+1]);
- /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
+ high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+ /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
+ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
#else
- high = ref_blocks[ptr];
- low = ref_blocks[ptr+1];
- /* nextlow = ref_blocks[ptr+4]; */
+ high1 = ref_blocks[ptr];
+ /* low1 = ref_blocks[ptr+1]; */
+ nextlow = ref_blocks[ptr+4];
#endif
if (mode == CMET_STRANDED) {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
} else if (mode == CMET_NONSTRANDED) {
if (genestrand > 0) {
- high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); /* nextlow = Cmet_reduce_ct(nextlow); */
+ high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
} else {
- high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); /* nextlow = Cmet_reduce_ga(nextlow); */
+ high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
}
}
- nextlow_rc = low_rc; /* depended on nextlow */
- low_rc = ~low;
- high_rc = ~high;
+ /* low1_rc = ~low1; */
+ low1_rc = nextlow_rc;
- if (indexsize == 8) {
- chrpos = store_8mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ nextlow_rc = ~nextlow;
+ high1_rc = ~high1;
+
+ if (indexsize == 9) {
+ chrpos = store_9mers_rev_partial(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+ } else if (indexsize == 8) {
+ chrpos = store_8mers_rev_partial(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
} else if (indexsize == 7) {
- chrpos = store_7mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ chrpos = store_7mers_rev_partial(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
} else if (indexsize == 6) {
- chrpos = store_6mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ chrpos = store_6mers_rev_partial(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
} else if (indexsize == 5) {
- chrpos = store_5mers_rev_partial(chrpos,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+ chrpos = store_5mers_rev_partial(chrpos,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
} else {
- fprintf(stderr,"indexsize %d not supported\n",indexsize);
abort();
}
}
@@ -17322,128 +25805,71 @@ store_positions_rev_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
-#if 0
-/* Checks for overabundance */
-static int
-allocate_positions_check (Chrpos_T **pointers, Chrpos_T **positions,
- bool *overabundant, bool *inquery, Count_T *counts, int *relevant_counts,
- int oligospace) {
- int totalcounts;
- Chrpos_T *p;
- int overabundance_threshold;
- int n, i;
-
- n = 0;
- for (i = 0; i < oligospace; i++) {
- if (overabundant[i] == true) {
- counts[i] = 0;
- } else if (inquery[i] == false) {
- counts[i] = 0;
- } else if (counts[i] > 0) {
- relevant_counts[n++] = counts[i];
- }
- }
-
- totalcounts = 0;
- if (n < OVERABUNDANCE_CHECK) {
- for (i = 0; i < oligospace; i++) {
- totalcounts += counts[i];
- }
-
- } else {
- overabundance_threshold = Orderstat_int_pct_inplace(relevant_counts,n,OVERABUNDANCE_PCT);
- debug1(printf("overabundance threshold is %d\n",overabundance_threshold));
- if (overabundance_threshold < OVERABUNDANCE_MIN) {
- overabundance_threshold = OVERABUNDANCE_MIN;
- debug1(printf(" => resetting to %d\n",overabundance_threshold));
- }
-
- for (i = 0; i < oligospace; i++) {
- if (counts[i] > overabundance_threshold) {
- overabundant[i] = true;
- counts[i] = 0;
- } else {
- totalcounts += counts[i];
- }
- }
- }
-
- if (totalcounts == 0) {
- positions[0] = (Chrpos_T *) NULL;
- } else {
- p = (Chrpos_T *) CALLOC(totalcounts,sizeof(Chrpos_T));
- for (i = 0; i < oligospace; i++) {
- positions[i] = p;
- p += counts[i];
- }
- memcpy((void *) pointers,positions,oligospace*sizeof(Chrpos_T *));
- }
-
- return totalcounts;
-}
-#endif
-
-
#define POLY_A 0x0000
#define POLY_C 0x5555
#define POLY_G 0xAAAA
#define POLY_T 0xFFFF
-#define ONE_CHAR 1
-#define TWO_CHARS 2
-#define SIMD_NCHARS 16
-
-#define ONE_INT 4
-#define TWO_INTS 8
#define SIMD_NINTS 4
#ifdef HAVE_SSE2
-static int
+static Chrpos_T *
allocate_positions (Chrpos_T **pointers, Chrpos_T **positions,
Count_T *inquery, Count_T *counts, int oligospace,
- Shortoligomer_T mask) {
- /* int totalcounts_old; */
+ Shortoligomer_T mask, int indexsize) {
+ Chrpos_T *positions_space, **pointers_end, *p;
int totalcounts = 0;
- Chrpos_T *p;
- int i;
- __m128i *inquery_ptr, *counts_ptr, *end_ptr, zero, vec;
+ int i, j;
+ __m128i *inquery_ptr, *counts_ptr, *end_ptr, qcounts;
__m128i terms_ptr[1];
Count_T *terms;
- /* __m128i result_allocated[1]; */
- /* int *result; */
int *nskip, *nskip_ptr;
+#ifndef HAVE_SSE4_1
+ __m128i zero;
+#endif
+#if 0
+ /* Causes problems with new algorithm */
+ inquery[POLY_A & mask] = INQUERY_FALSE;
+ inquery[POLY_C & mask] = INQUERY_FALSE;
+ inquery[POLY_G & mask] = INQUERY_FALSE;
+ inquery[POLY_T & mask] = INQUERY_FALSE;
+#endif
- counts[POLY_A & mask] = 0;
- counts[POLY_C & mask] = 0;
- counts[POLY_G & mask] = 0;
- counts[POLY_T & mask] = 0;
-
- nskip_ptr = nskip = (int *) MALLOCA((oligospace/SIMD_NCHARS + 1) * sizeof(int));
+ /* nskip is a run-length of zero counts, which allows faster processing the second time through */
+ nskip_ptr = nskip = (int *) MALLOCA((oligospace/SIMD_NELTS + 1) * sizeof(int));
*nskip_ptr = 0;
inquery_ptr = (__m128i *) inquery;
counts_ptr = (__m128i *) counts;
- end_ptr = &(counts_ptr[oligospace/SIMD_NCHARS]);
+ end_ptr = &(counts_ptr[oligospace/SIMD_NELTS]);
terms = (Count_T *) terms_ptr;
+#ifndef HAVE_SSE4_1
zero = _mm_set1_epi8(0);
+#endif
+
+ debug(i = 0);
while (counts_ptr < end_ptr) {
- vec = _mm_and_si128(*counts_ptr,*inquery_ptr++);
- _mm_store_si128(counts_ptr++,vec);
+ debug(printf("%d\n",i));
+ debug(i += 16);
+ debug(print_counts(*counts_ptr,"counts"));
+ qcounts = _mm_and_si128(*counts_ptr,*inquery_ptr++); /* counts in query */
+ _mm_store_si128(counts_ptr++,qcounts); /* and store back, so we don't need inquery or overabundant any more */
if (
#ifdef HAVE_SSE4_1
- _mm_testz_si128(vec,vec)
+ _mm_testz_si128(qcounts,qcounts)
#else
- /*cmp*/_mm_movemask_epi8(_mm_cmpeq_epi8(vec,zero)) == 0xFFFF
+ /*cmp*/_mm_movemask_epi8(_mm_cmpeq_epi8(qcounts,zero)) == 0xFFFF
#endif
) {
/* All counts are zero, so incrementing nskip */
- (*nskip_ptr) += SIMD_NCHARS;
+ (*nskip_ptr) += 1;
+
} else {
- /* Non-zero count found */
- _mm_store_si128(terms_ptr,vec);
+ /* A valid count found */
+ _mm_store_si128(terms_ptr,qcounts);
totalcounts += terms[0] + terms[1] + terms[2] + terms[3] + terms[4] + terms[5] + terms[6] + terms[7] +
terms[8] + terms[9] + terms[10] + terms[11] + terms[12] + terms[13] + terms[14] + terms[15];
*(++nskip_ptr) = 0; /* Advance ptr and initialize */
@@ -17454,7 +25880,7 @@ allocate_positions (Chrpos_T **pointers, Chrpos_T **positions,
/* For debugging */
totalcounts_old = 0;
for (i = 0; i < oligospace; i++) {
- if (inquery[i] == /*true*/0xFF) {
+ if (inquery[i] == INQUERY_TRUE) {
totalcounts_old += counts[i];
}
}
@@ -17465,83 +25891,128 @@ allocate_positions (Chrpos_T **pointers, Chrpos_T **positions,
}
#endif
+ debug(printf("totalcounts is %d\n",totalcounts));
if (totalcounts == 0) {
- positions[0] = (Chrpos_T *) NULL;
+ positions_space = (Chrpos_T *) NULL;
} else {
/* Need to assign positions[0] so we can free the space */
- /* pointers[0] = */ positions[0] = p = (Chrpos_T *) CALLOC(totalcounts,sizeof(Chrpos_T));
+ pointers_end = &(pointers[-1]); /* or pointers_allocated[0] */
+ p = positions_space = (Chrpos_T *) MALLOC(totalcounts * sizeof(Chrpos_T));
+ i = 0;
nskip_ptr = nskip;
- i = *nskip_ptr++;
- while (i < oligospace) {
- /* starti = i; */
- pointers[i] = positions[i] = p; /* 0 */
- p += counts[i++];
- pointers[i] = positions[i] = p; /* 1 */
+ j = *nskip_ptr++;
+ while (i + j*SIMD_NELTS < oligospace) {
+#if 0
+ while (--j >= 0) {
+ positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
+ positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
+ positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
+ positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
+ }
+#else
+ /* Not necessary to assign since we check for counts[i] == 0 */
+ pointers_end[i] = /* positions[i] = */ p;
+ i += j*16;
+#endif
+
+ pointers_end[i] = positions[i] = p; /* 0 */
p += counts[i++];
- pointers[i] = positions[i] = p; /* 2 */
+
+ pointers_end[i] = positions[i] = p; /* 1 */
p += counts[i++];
- pointers[i] = positions[i] = p; /* 3 */
+
+ pointers_end[i] = positions[i] = p; /* 2 */
p += counts[i++];
- pointers[i] = positions[i] = p; /* 4 */
+
+ pointers_end[i] = positions[i] = p; /* 3 */
p += counts[i++];
- pointers[i] = positions[i] = p; /* 5 */
+
+ pointers_end[i] = positions[i] = p; /* 4 */
p += counts[i++];
- pointers[i] = positions[i] = p; /* 6 */
+
+ pointers_end[i] = positions[i] = p; /* 5 */
p += counts[i++];
- pointers[i] = positions[i] = p; /* 7 */
+
+ pointers_end[i] = positions[i] = p; /* 6 */
p += counts[i++];
- pointers[i] = positions[i] = p; /* 8 */
+
+ pointers_end[i] = positions[i] = p; /* 7 */
p += counts[i++];
- pointers[i] = positions[i] = p; /* 9 */
+
+ pointers_end[i] = positions[i] = p; /* 8 */
p += counts[i++];
- pointers[i] = positions[i] = p; /* 10 */
+
+ pointers_end[i] = positions[i] = p; /* 9 */
p += counts[i++];
- pointers[i] = positions[i] = p; /* 11 */
+
+ pointers_end[i] = positions[i] = p; /* 10 */
p += counts[i++];
- pointers[i] = positions[i] = p; /* 12 */
+
+ pointers_end[i] = positions[i] = p; /* 11 */
p += counts[i++];
- pointers[i] = positions[i] = p; /* 13 */
+
+ pointers_end[i] = positions[i] = p; /* 12 */
p += counts[i++];
- pointers[i] = positions[i] = p; /* 14 */
+
+ pointers_end[i] = positions[i] = p; /* 13 */
p += counts[i++];
- pointers[i] = positions[i] = p; /* 15 */
+
+ pointers_end[i] = positions[i] = p; /* 14 */
p += counts[i++];
- positions[i] = p; /* 16, used for indicating if pointer hits next position. Do not need to copy to pointers[i] */
-#if 0
- /* Incremental call. Turns out to be slightly slower than the individual assignments above. */
- memcpy((void *) &(pointers[starti]),&(positions[starti]),16*sizeof(Chrpos_T *));
-#endif
+ pointers_end[i] = positions[i] = p; /* 15 */
+ p += counts[i++];
- i += *nskip_ptr++;
+ j = *nskip_ptr++;
}
+
#if 0
- /* Single call replaced by incremental calls above */
- /* Does not copy position[oligospace] */
- memcpy((void *) pointers,positions,oligospace*sizeof(Chrpos_T *));
+ while (--j >= 0) {
+ /* Not necessary to assign since we check for counts[i] == 0 */
+ positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
+ positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
+ positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
+ positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
+ }
+#else
+ if (j > 0) {
+ pointers_end[i] = /* positions[i] = */ p;
+ /* i += j*16; */
+ }
#endif
}
+
+#if 0
+ /* Faster to assign each individual pointer above */
+ memcpy((void *) pointers,&(positions[1]),(oligospace-1)*sizeof(Chrpos_T *));
+#endif
+ pointers[oligospace-1] = p; /* or pointers_end[oligospace] or pointers_allocated[oligospace+1] */
+
+ /* dump_allocations(positions,counts,oligospace,indexsize,positions_space); */
FREEA(nskip);
- return totalcounts;
+ return positions_space;
}
#else
-static int
+static Chrpos_T *
allocate_positions (Chrpos_T **pointers, Chrpos_T **positions,
bool *inquery, Count_T *counts, int oligospace,
- Shortoligomer_T mask) {
+ Shortoligomer_T mask, int indexsize) {
+ Chrpos_T *positions_space, *p;
int totalcounts;
- Chrpos_T *p;
int i;
- counts[POLY_A & mask] = 0;
- counts[POLY_C & mask] = 0;
- counts[POLY_G & mask] = 0;
- counts[POLY_T & mask] = 0;
+#if 0
+ /* Causes problems with new algorithm */
+ inquery[POLY_A & mask] = false;
+ inquery[POLY_C & mask] = false;
+ inquery[POLY_G & mask] = false;
+ inquery[POLY_T & mask] = false;
+#endif
for (i = 0; i < oligospace; i++) {
if (inquery[i] == false) {
@@ -17558,20 +26029,19 @@ allocate_positions (Chrpos_T **pointers, Chrpos_T **positions,
if (totalcounts == 0) {
- positions[0] = (Chrpos_T *) NULL;
+ positions_space = (Chrpos_T *) NULL;
} else {
- p = (Chrpos_T *) CALLOC(totalcounts,sizeof(Chrpos_T));
- /* First iteration sets positions[0] so we can free the memory */
+ p = positions_space = (Chrpos_T *) CALLOC(totalcounts,sizeof(Chrpos_T));
+
for (i = 0; i < oligospace; i++) {
positions[i] = p;
p += counts[i];
}
- positions[i] = p; /* For positions[oligospace], used for indicating if pointer hits next position */
- /* Does not copy positions[oligospace] */
- memcpy((void *) pointers,positions,oligospace*sizeof(Chrpos_T *));
+ memcpy((void *) pointers,&(positions[1]),(oligospace-1)*sizeof(Chrpos_T *));
+ pointers[oligospace-1] = p;
}
- return totalcounts;
+ return positions_space;
}
#endif
@@ -17579,7 +26049,7 @@ allocate_positions (Chrpos_T **pointers, Chrpos_T **positions,
#ifdef DEBUG14
static void
-counts_compare (Count_T *counts1, Count_T *counts2, Oligospace_T oligospace) {
+counts_compare (Count_T *counts1, Count0_T *counts2, Oligospace_T oligospace) {
Oligospace_T i;
for (i = 0; i < oligospace; i++) {
@@ -17592,20 +26062,49 @@ counts_compare (Count_T *counts1, Count_T *counts2, Oligospace_T oligospace) {
}
static void
-positions_compare (Chrpos_T **positions1, Chrpos_T **positions2, Count_T *counts, int oligospace) {
+positions_compare (Chrpos_T **positions1, Count_T *counts1, Count_T *inquery1,
+ Chrpos_T **positions2, Count0_T *counts2, Oligospace_T oligospace,
+ int indexsize, Shortoligomer_T mask) {
Oligospace_T i;
- int hit;
+ Count_T hit;
+ char *nt;
+ /* printf("Start of positions_compare\n"); */
for (i = 0; i < oligospace; i++) {
/* nt = shortoligo_nt(i,indexsize); */
- for (hit = 0; hit < counts[i]; hit++) {
- if (positions1[i][hit] != positions2[i][hit]) {
- printf("At oligo %llu, hit %d, positions1 %u != positions2 %u\n",
- (unsigned long long) i,hit,positions1[i][hit],positions2[i][hit]);
+ if (inquery1[i] == INQUERY_FALSE) {
+ /* Skip */
+ } else if (counts1[i] != (Count_T) counts2[i]) {
+ /* Can happen if count > 127 */
+ if (i == (POLY_A & mask) || i == (POLY_C & mask) || i == (POLY_G & mask) || i == (POLY_T & mask)) {
+ /* Ignore */
+ } else {
+ nt = shortoligo_nt(i,indexsize);
+ printf("At oligo %s (%llu), counts1 %d != counts2 %d, inquery1 %hd\n",
+ nt,i,counts1[i],counts2[i],inquery1[i]);
+ FREE(nt);
abort();
}
+ } else {
+ for (hit = 0; hit < counts1[i]; hit++) {
+ if (positions1[i][hit] != positions2[i][hit]) {
+ nt = shortoligo_nt(i,indexsize);
+ printf("At oligo %s (%llu), hit %d/%d, positions1 %u != positions2 %u\n",
+ nt,(unsigned long long) i,hit,counts1[i],positions1[i][hit],positions2[i][hit]);
+ FREE(nt);
+ abort();
+ }
+ }
+
+#if 0
+ nt = shortoligo_nt(i,indexsize);
+ printf("At oligo %s, %d positions are equal\n",nt,counts1[i]);
+ FREE(nt);
+#endif
+
}
}
+ /* printf("End of positions_compare\n"); */
return;
}
@@ -17635,7 +26134,6 @@ edge_detect (int *edge, int *sumx, int *sumxx, int length) {
sumx_pseudo = NPSEUDO * theta;
min_rss_sep = sumxx_right - sumx_right*theta;
debug1(printf("theta: %d/%d = %f\n",sumx_right,length,theta));
- debug1(printf("rss: %f\n",rss));
debug1(printf("%s %s %s %s %s %s %s %s %s %s %s\n",
"pos","x","sumx.left","n.left","sumx.right","n.right",
@@ -17655,6 +26153,7 @@ edge_detect (int *edge, int *sumx, int *sumxx, int length) {
rss_right = sumxx_right - sumx_right*theta_right;
rss_sep = rss_left + rss_right;
+#if 0
debug1(
if (rss_sep > 0.0) {
fscore = ((double) (length - 2))*(rss - rss_sep)/rss_sep;
@@ -17666,6 +26165,7 @@ edge_detect (int *edge, int *sumx, int *sumxx, int length) {
pos,sumx[pos]-sumx[pos-1],sumx_left,n_left,sumx_right,n_right,
theta_left,theta_right,rss_left,rss_right);
});
+#endif
/* fscore = (n-2)*(rss - rss_sep)/rss_sep = (n-2)*(rss/rss_sep -
1) is maximized when rss_sep is minimized */
@@ -17736,6 +26236,7 @@ trim_start_detect (int start, int end, int *sumx, int *sumxx) {
rss_right = sumxx_right - sumx_right*theta_right;
rss_sep = rss_left + rss_right;
+#if 0
debug1(
if (rss_sep > 0.0) {
fscore = ((double) (end - start - 2))*(rss - rss_sep)/rss_sep;
@@ -17747,6 +26248,7 @@ trim_start_detect (int start, int end, int *sumx, int *sumxx) {
pos,sumx_left,n_left,sumx_right,n_right,
theta_left,theta_right,rss_left,rss_right);
});
+#endif
/* fscore = (n-2)*(rss - rss_sep)/rss_sep = (n-2)*(rss/rss_sep -
1) is maximized when rss_sep is minimized */
@@ -17811,6 +26313,7 @@ trim_end_detect (int start, int end, int *sumx, int *sumxx) {
rss_right = sumxx_right - sumx_right*theta_right;
rss_sep = rss_left + rss_right;
+#if 0
debug1(
if (rss_sep == 0) {
printf("%d %d %d %d %d %f %f %f %f NA\n",
@@ -17822,6 +26325,7 @@ trim_end_detect (int start, int end, int *sumx, int *sumxx) {
pos,sumx_left,n_left,sumx_right,n_right,
theta_left,theta_right,rss_left,rss_right,fscore);
});
+#endif
/* fscore = (n-2)*(rss - rss_sep)/rss_sep = (n-2)*(rss/rss_sep -
1) is maximized when rss_sep is minimized */
@@ -17853,7 +26357,8 @@ trim_end_detect (int start, int end, int *sumx, int *sumxx) {
double
Oligoindex_set_inquery (int *badoligos, int *repoligos, int *trimoligos, int *trim_start, int *trim_end,
- T this, char *queryuc_ptr, int querylength, bool trimp) {
+ T this, char *queryuc_ptr, int querystart, int queryend, bool trimp) {
+ int querylength;
double oligodepth;
int ngoodoligos, nrepoligos, x, *sumx, *sumxx, sumx0 = 0, sumxx0 = 0;
int edge, side;
@@ -17873,20 +26378,23 @@ Oligoindex_set_inquery (int *badoligos, int *repoligos, int *trimoligos, int *tr
char *nt;
#endif
- if (this->query_evaluated_p == true) {
- return 1.0;
- } else {
- this->query_evaluated_p = true; /* Set this flag so we don't redo this part */
- }
+ querylength = queryend - querystart;
if (querylength <= indexsize) {
*badoligos = 0;
*trim_start = 0;
*trim_end = querylength;
return 1.0;
+
+ } else {
+#ifdef HAVE_SSE2
+ memset(this->inquery,/*INQUERY_FALSE*/0,this->oligospace * sizeof(Count_T));
+#else
+ memset(this->inquery,/*false*/0,this->oligospace * sizeof(bool));
+#endif
}
-
- for (i = 0, p = queryuc_ptr; i < querylength; i++, p++) {
+
+ for (i = querystart, p = &(queryuc_ptr[querystart]); i < queryend; i++, p++) {
in_counter++;
switch (*p) {
@@ -17904,11 +26412,11 @@ Oligoindex_set_inquery (int *badoligos, int *repoligos, int *trimoligos, int *tr
printf("At querypos %d, oligo %s seen\n",i,nt);
FREE(nt));
- this->counts[masked] += 1;
+ this->counts[masked] += 1; /* For determination of trimming */
#ifdef HAVE_SSE2
- if (this->inquery[masked] == /*false*/0x00) {
+ if (this->inquery[masked] == INQUERY_FALSE) {
nunique += 1;
- this->inquery[masked] = /*true*/0xFF;
+ this->inquery[masked] = INQUERY_TRUE;
}
#else
if (this->inquery[masked] == false) {
@@ -17925,7 +26433,12 @@ Oligoindex_set_inquery (int *badoligos, int *repoligos, int *trimoligos, int *tr
*trim_start = 0;
*trim_end = querylength;
return 1.0;
+
} else {
+ /* Not designed to handle trimming on a subset of the query */
+ assert(querystart == 0);
+ assert(queryend == querylength);
+
/* Determine where to trim using a changepoint analysis */
#ifdef GSNAP
sumx = (int *) CALLOCA(querylength - indexsize + 1,sizeof(int));
@@ -18096,7 +26609,7 @@ allocate_positions (Chrpos_T **pointers, Chrpos_T **positions, bool *overabundan
FREE(nt));
#ifdef HAVE_SSE2
- } else if (inquery[masked] == /*false*/0x00) {
+ } else if (inquery[masked] == INQUERY_FALSE) {
/* Don't bother, because it's not in the query sequence */
debug(nt = shortoligo_nt(masked,indexsize);
printf("At genomicpos %u, oligo %s wasn't seen in querypos\n",sequencepos,nt);
@@ -18220,7 +26733,7 @@ store_positions (Chrpos_T **pointers, bool *overabundant,
/* Don't bother */
#ifdef HAVE_SSE2
- } else if (inquery[masked] == /*false*/0x00) {
+ } else if (inquery[masked] == INQUERY_FALSE) {
/* Don't bother, because it's not in the query sequence */
#else
} else if (inquery[masked] == false) {
@@ -18256,143 +26769,121 @@ store_positions (Chrpos_T **pointers, bool *overabundant,
/* chrpos is sequencepos */
void
Oligoindex_hr_tally (T this, Univcoord_T mappingstart, Univcoord_T mappingend, bool plusp,
- char *queryuc_ptr, int querylength, Chrpos_T chrpos, int genestrand) {
+ char *queryuc_ptr, int querystart, int queryend, Chrpos_T chrpos, int genestrand) {
int badoligos, repoligos, trimoligos, trim_start, trim_end;
#ifdef DEBUG14
- Count_T *counts_std;
- Chrpos_T **pointers_std;
- Chrpos_T **positions_std;
+ Count0_T *counts_old;
+ Chrpos_T **positions_old;
#endif
+ Oligospace_T oligo;
- Oligoindex_set_inquery(&badoligos,&repoligos,&trimoligos,&trim_start,&trim_end,this,
- queryuc_ptr,querylength,/*trimp*/false);
+ /* Sets counts for trimming when trimp is true */
+ Oligoindex_set_inquery(&badoligos,&repoligos,&trimoligos,&trim_start,&trim_end,this,
+ queryuc_ptr,querystart,queryend,/*trimp*/false);
memset((void *) this->counts,0,this->oligospace*sizeof(Count_T));
-#if 0
- memset((void *) this->overabundant,false,this->oligospace*sizeof(bool));
- /* Test for thread safety */
- for (i = 0; i < this->oligospace; i++) {
- if (this->counts[i] != 0) {
- abort();
- }
- }
- for (i = 0; i < this->oligospace; i++) {
- if (this->overabundant[i] != false) {
- abort();
- }
- }
-
- /* These values will prevent oligoindex from getting mappings later */
- this->overabundant[POLY_A & this->mask] = true;
- this->overabundant[POLY_C & this->mask] = true;
- this->overabundant[POLY_G & this->mask] = true;
- this->overabundant[POLY_T & this->mask] = true;
-#endif
debug0(printf("called with mapping %u..%u\n",mappingstart,mappingend));
if (plusp == true) {
- debug0(printf("plus, first sequencepos is %u\n",chrpos));
+ debug0(printf("plus, origin is %u\n",chrpos));
#ifdef USE_SIMD_FOR_COUNTS
count_positions_fwd_simd(this->counts,this->indexsize,mappingstart,mappingend,genestrand);
-#ifdef DEBUG14
- counts_std = (Count_T *) CALLOC(this->oligospace,sizeof(Count_T));
- count_positions_fwd_std(counts_std,this->indexsize,mappingstart,mappingend,genestrand);
- counts_compare(this->counts,counts_std,this->oligospace);
-#endif
#else
count_positions_fwd_std(this->counts,this->indexsize,mappingstart,mappingend,genestrand);
#endif
+
+ if ((this->positions_space = allocate_positions(this->pointers,this->positions,this->inquery,this->counts,
+ this->oligospace,this->mask,this->indexsize)) != NULL) {
- if (allocate_positions(this->pointers,this->positions,this->inquery,this->counts,
- this->oligospace,this->mask) > 0) {
- /* Shift positions array by 1 so we can use positions[masked] instead of positions[masked+1] */
#ifdef USE_SIMD_FOR_COUNTS
- store_positions_fwd_simd(this->pointers,&(this->positions[1]),this->counts,this->indexsize,mappingstart,mappingend,
+ store_positions_fwd_simd(this->pointers,this->positions,this->counts,this->indexsize,mappingstart,mappingend,
chrpos,genestrand);
-#ifdef DEBUG14
- pointers_std = (Chrpos_T **) CALLOC(this->oligospace,sizeof(Chrpos_T *));
- positions_std = (Chrpos_T **) CALLOC(this->oligospace+1,sizeof(Chrpos_T *));
- allocate_positions(pointers_std,positions_std,this->inquery,counts_std,
- this->oligospace,this->mask);
- store_positions_fwd_std(pointers_std,&(positions_std[1]),counts_std,this->indexsize,mappingstart,mappingend,
- chrpos,genestrand);
- positions_compare(this->positions,positions_std,counts_std,this->oligospace);
- FREE(positions_std);
- FREE(pointers_std);
-#endif
-
#else
- store_positions_fwd_std(this->pointers,&(this->positions[1]),this->counts,this->indexsize,mappingstart,mappingend,
+ store_positions_fwd_std(this->pointers,this->positions,this->counts,this->indexsize,mappingstart,mappingend,
chrpos,genestrand);
#endif
- }
+ debug9(printf("plus, origin is %u\n",chrpos));
+ debug9(dump_positions(this->positions,this->counts,this->inquery,this->oligospace,this->indexsize));
+
#ifdef DEBUG14
- FREE(counts_std);
+ positions_old = Oligoindex_old_tally(&counts_old,mappingstart,mappingend,plusp,
+ queryuc_ptr,querylength,chrpos,genestrand,
+ this->oligospace,this->indexsize,this->mask);
+ positions_compare(this->positions,this->counts,this->inquery,
+ positions_old,counts_old,this->oligospace,this->indexsize,this->mask);
+ FREE(counts_old);
+ FREE(positions_old[0]);
+ FREE(positions_old);
#endif
+ }
} else {
- debug0(printf("minus, first sequencepos is %u\n",chrpos));
+ debug0(printf("minus, origin is %u\n",chrpos));
#ifdef USE_SIMD_FOR_COUNTS
count_positions_rev_simd(this->counts,this->indexsize,mappingstart,mappingend,genestrand);
-#ifdef DEBUG14
- counts_std = (Count_T *) CALLOC(this->oligospace,sizeof(Count_T));
- count_positions_rev_std(counts_std,this->indexsize,mappingstart,mappingend,genestrand);
- counts_compare(this->counts,counts_std,this->oligospace);
-#endif
#else
count_positions_rev_std(this->counts,this->indexsize,mappingstart,mappingend,genestrand);
#endif
-
- if (allocate_positions(this->pointers,this->positions,this->inquery,this->counts,
- this->oligospace,this->mask) > 0) {
- /* Shift positions array by 1 so we can use positions[masked] instead of positions[masked+1] */
+
+ if ((this->positions_space = allocate_positions(this->pointers,this->positions,this->inquery,this->counts,
+ this->oligospace,this->mask,this->indexsize)) != NULL) {
#ifdef USE_SIMD_FOR_COUNTS
- store_positions_rev_simd(this->pointers,&(this->positions[1]),this->counts,this->indexsize,mappingstart,mappingend,
+ store_positions_rev_simd(this->pointers,this->positions,this->counts,this->indexsize,mappingstart,mappingend,
chrpos,genestrand);
-#ifdef DEBUG14
- pointers_std = (Chrpos_T **) CALLOC(this->oligospace,sizeof(Chrpos_T *));
- positions_std = (Chrpos_T **) CALLOC(this->oligospace+1,sizeof(Chrpos_T *));
- allocate_positions(pointers_std,positions_std,this->inquery,counts_std,
- this->oligospace,this->mask);
- store_positions_rev_std(pointers_std,&(positions_std[1]),counts_std,this->indexsize,mappingstart,mappingend,
- chrpos,genestrand);
- positions_compare(this->positions,positions_std,counts_std,this->oligospace);
- FREE(positions_std);
- FREE(pointers_std);
-#endif
-
#else
- store_positions_rev_std(this->pointers,&(this->positions[1]),this->counts,this->indexsize,mappingstart,mappingend,
+ store_positions_rev_std(this->pointers,this->positions,this->counts,this->indexsize,mappingstart,mappingend,
chrpos,genestrand);
#endif
- }
+ debug9(printf("minus, origin is %u\n",chrpos));
+ debug9(dump_positions(this->positions,this->counts,this->inquery,this->oligospace,this->indexsize));
+
#ifdef DEBUG14
- FREE(counts_std);
+ positions_old = Oligoindex_old_tally(&counts_old,mappingstart,mappingend,plusp,
+ queryuc_ptr,querylength,chrpos,genestrand,
+ this->oligospace,this->indexsize,this->mask);
+ positions_compare(this->positions,this->counts,this->inquery,
+ positions_old,counts_old,this->oligospace,this->indexsize,this->mask);
+ FREE(counts_old);
+ FREE(positions_old[0]);
+ FREE(positions_old);
#endif
+ }
}
- debug9(dump_positions(this->positions,this->counts,this->oligospace,this->indexsize));
+#if 0
+ /* counts already modified by allocate_positions */
+ /* Speed up diagonal and stage 2 algorithms */
+ for (oligo = 0; oligo < this->oligospace; oligo++) {
+ if (this->counts[oligo] > EXCESSIVE_COUNTS) {
+ this->counts[oligo] = 0;
+ }
+ }
+#endif
return;
}
+
void
-Oligoindex_clear_inquery (T this, char *queryuc_ptr, int querylength) {
+Oligoindex_clear_inquery (T this, char *queryuc_ptr, int querystart, int queryend) {
int in_counter = 0, i;
char *p;
Shortoligomer_T oligo = 0U;
Shortoligomer_T masked;
int indexsize = this->indexsize;
+#ifdef DEBUG
+ char *nt;
+#endif
- for (i = 0, p = queryuc_ptr; i < querylength; i++, p++) {
+ for (i = querystart, p = &(queryuc_ptr[querystart]); i < queryend; i++, p++) {
in_counter++;
switch (*p) {
@@ -18405,13 +26896,15 @@ Oligoindex_clear_inquery (T this, char *queryuc_ptr, int querylength) {
if (in_counter == indexsize) {
masked = oligo & this->mask;
- debug(nt = shortoligo_nt(oligo,indexsize);
- printf("At querypos %d, oligo %s seen\n",i,nt);
- FREE(nt));
+#ifdef DEBUG
+ nt = shortoligo_nt(oligo,indexsize);
+ printf("At querypos %d, oligo %s seen\n",i,nt);
+ FREE(nt);
+#endif
this->counts[masked] = 0;
#ifdef HAVE_SSE2
- this->inquery[masked] = /*false*/0x00;
+ this->inquery[masked] = INQUERY_FALSE;
#else
this->inquery[masked] = false;
#endif
@@ -18419,7 +26912,7 @@ Oligoindex_clear_inquery (T this, char *queryuc_ptr, int querylength) {
}
}
- this->query_evaluated_p = false;
+ /* this->query_evaluated_p = false; */
return;
}
@@ -18427,14 +26920,16 @@ Oligoindex_clear_inquery (T this, char *queryuc_ptr, int querylength) {
void
Oligoindex_untally (T this, char *queryuc_ptr, int querylength) {
+#if 0
+
if (this->query_evaluated_p == true) {
#ifdef GSNAP
Oligoindex_clear_inquery(this,queryuc_ptr,querylength);
#else
- if (querylength > this->oligospace) {
+ if ((Oligospace_T) querylength > this->oligospace) {
/* For very long sequences, it may be better to just clear all oligos directly */
#ifdef HAVE_SSE2
- memset((void *) this->inquery,/*false*/0x00,this->oligospace*sizeof(Count_T));
+ memset((void *) this->inquery,INQUERY_FALSE,this->oligospace*sizeof(Count_T));
#else
memset((void *) this->inquery,false,this->oligospace*sizeof(bool));
#endif
@@ -18446,11 +26941,12 @@ Oligoindex_untally (T this, char *queryuc_ptr, int querylength) {
#endif
/* This statement is critical to avoid interactions between queryseqs */
- this->query_evaluated_p = false;
+ /* this->query_evaluated_p = false; */
}
+#endif
- if (this->positions[0] != NULL) {
- FREE(this->positions[0]);
+ if (this->positions_space != NULL) {
+ FREE(this->positions_space);
}
return;
@@ -18461,7 +26957,7 @@ Oligoindex_untally (T this, char *queryuc_ptr, int querylength) {
static void
Oligoindex_free (T *old) {
if (*old) {
- FREE((*old)->pointers);
+ FREE((*old)->pointers_allocated);
FREE((*old)->positions);
#ifdef HAVE_SSE2
_mm_free((*old)->counts_allocated);
@@ -18500,8 +26996,8 @@ lookup (int *nhits, T this, Shortoligomer_T masked) {
if ((*nhits = this->counts[masked]) >= 1) {
debug(nt = shortoligo_nt(masked,this->indexsize);
- printf("masked %s => %d entries: %u...%u\n",
- nt,*nhits,this->positions[masked][0],this->positions[masked][*nhits-1]);
+ printf("masked is %s (%u) => %d entries: %u...%u\n",
+ nt,masked,*nhits,this->positions[masked][0],this->positions[masked][*nhits-1]);
FREE(nt));
return this->positions[masked];
} else {
@@ -18547,8 +27043,8 @@ consecutivep (int prev_querypos, unsigned int *prev_mappings, int prev_nhits,
List_T
Oligoindex_get_mappings (List_T diagonals, bool *coveredp, Chrpos_T **mappings, int *npositions,
int *totalpositions, bool *oned_matrix_p, int *maxnconsecutive,
- Oligoindex_array_T array, T this, char *queryuc_ptr, int querylength,
- Chrpos_T chrstart, Chrpos_T chrend,
+ Oligoindex_array_T array, T this, char *queryuc_ptr,
+ int querystart, int queryend, int querylength, Chrpos_T chrstart, Chrpos_T chrend,
Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
Diagpool_T diagpool) {
int nhits, hit, diagi_adjustment, i;
@@ -18575,6 +27071,8 @@ Oligoindex_get_mappings (List_T diagonals, bool *coveredp, Chrpos_T **mappings,
int indexsize = this->indexsize;
+ debug3(printf("Starting Oligoindex_get_mappings\n"));
+
diag_lookback = this->diag_lookback;
suffnconsecutive = this->suffnconsecutive;
genomiclength = chrend - chrstart;
@@ -18617,9 +27115,9 @@ Oligoindex_get_mappings (List_T diagonals, bool *coveredp, Chrpos_T **mappings,
#endif
- querypos = -indexsize;
+ querypos = querystart - indexsize;
*oned_matrix_p = true;
- for (i = 0, p = queryuc_ptr; i < querylength; i++, p++) {
+ for (i = querystart, p = &(queryuc_ptr[querystart]); i < queryend; i++, p++) {
in_counter++;
querypos++;
@@ -18644,6 +27142,12 @@ Oligoindex_get_mappings (List_T diagonals, bool *coveredp, Chrpos_T **mappings,
debug3(printf("querypos %d, masked %u, nhits %d\n",querypos,masked,nhits));
if (nhits <= 0) {
cum_nohits[querypos] += 1;
+#if 0
+ } else if (nhits > EXCESSIVE_COUNTS) {
+ /* Already covered by setting counts > EXCESSIVE_COUNTS to be 0 */
+ /* Skip, because otherwise too slow */
+ cum_nohits[querypos] += 1;
+#endif
} else {
*totalpositions += nhits;
if (*totalpositions < 0) {
@@ -18673,7 +27177,7 @@ Oligoindex_get_mappings (List_T diagonals, bool *coveredp, Chrpos_T **mappings,
}
/* Must use >= here, so querypos 0 - (-diag_lookback) will fail */
- if (ptr->querypos < 0) {
+ if (ptr->querypos < querystart) {
debug3(printf("At diagi %d (checking querypos %d to %d), no consecutive\n",diagi,ptr->querypos,querypos));
ptr->nconsecutive = 0;
ptr->consecutive_start = querypos;
@@ -18747,7 +27251,10 @@ Oligoindex_get_mappings (List_T diagonals, bool *coveredp, Chrpos_T **mappings,
FREE(genomicdiag_init_p);
}
+ debug3(printf("Ending Oligoindex_get_mappings\n"));
+
return diagonals;
}
+
diff --git a/src/oligoindex_hr.h b/src/oligoindex_hr.h
index b7f22f1..31e1429 100644
--- a/src/oligoindex_hr.h
+++ b/src/oligoindex_hr.h
@@ -1,4 +1,4 @@
-/* $Id: oligoindex_hr.h 157232 2015-01-22 18:55:31Z twu $ */
+/* $Id: oligoindex_hr.h 166641 2015-05-29 21:13:04Z twu $ */
#ifndef OLIGOINDEX_HR_INCLUDED
#define OLIGOINDEX_HR_INCLUDED
@@ -15,7 +15,22 @@
#define OVERABUNDANCE_MIN 200
typedef UINT4 Shortoligomer_T;
+
+
+#if 1
typedef unsigned char Count_T;
+#define INQUERY_FALSE 0x00
+#define INQUERY_TRUE 0xFF
+#define SIMD_NELTS 16 /* 16 bytes in 128 bits */
+
+#else
+/* Attempted to use int, so we don't need to check for count > 255. However, SIMD is much faster on bytes than on ints */
+typedef unsigned int Count_T;
+#define INQUERY_FALSE 0x00000000
+#define INQUERY_TRUE 0xFFFFFFFF
+#define SIMD_NELTS 4 /* 4 ints in 128 bits */
+#endif
+
#define T Oligoindex_T
typedef struct T *T;
@@ -41,21 +56,22 @@ Oligoindex_array_new_minor (int max_querylength, int max_genomiclength);
extern double
Oligoindex_set_inquery (int *badoligos, int *repoligos, int *trimoligos, int *trim_start, int *trim_end,
- T this, char *queryuc_ptr, int querylength, bool trimp);
+ T this, char *queryuc_ptr, int querystart, int queryend, bool trimp);
extern void
Oligoindex_hr_tally (T this, Univcoord_T mappingstart, Univcoord_T mappingend, bool plusp,
- char *queryuc_ptr, int querylength, Chrpos_T chrpos, int genestrand);
+ char *queryuc_ptr, int querystart, int queryend, Chrpos_T chrpos, int genestrand);
extern void
Oligoindex_untally (T this, char *queryuc_ptr, int querylength);
extern void
-Oligoindex_clear_inquery (T this, char *queryuc_ptr, int querylength);
+Oligoindex_clear_inquery (T this, char *queryuc_ptr, int querystart, int queryend);
extern void
Oligoindex_array_free(Oligoindex_array_T *old);
extern List_T
Oligoindex_get_mappings (List_T diagonals, bool *coveredp, Chrpos_T **mappings, int *npositions,
int *totalpositions, bool *oned_matrix_p, int *maxnconsecutive,
- Oligoindex_array_T array, T this, char *queryuc_ptr, int querylength,
+ Oligoindex_array_T array, T this, char *queryuc_ptr,
+ int querystart, int queryend, int querylength,
Chrpos_T chrstart, Chrpos_T chrend,
Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
Diagpool_T diagpool);
diff --git a/src/outbuffer.c b/src/outbuffer.c
index bc7ffc8..5583a71 100644
--- a/src/outbuffer.c
+++ b/src/outbuffer.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: outbuffer.c 160877 2015-03-13 00:31:23Z twu $";
+static char rcsid[] = "$Id: outbuffer.c 162092 2015-03-26 18:30:31Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -19,14 +19,14 @@ static char rcsid[] = "$Id: outbuffer.c 160877 2015-03-13 00:31:23Z twu $";
#include "bool.h"
#include "mem.h"
#include "samheader.h"
-#include "samflags.h" /* For output types */
-#ifdef GSNAP
-#include "shortread.h"
-#include "samprint.h"
-#include "stage3hr.h"
-#endif
+/* MPI processing */
+#ifdef DEBUGM
+#define debugm(x) x
+#else
+#define debugm(x)
+#endif
#ifdef DEBUG
#define debug(x) x
@@ -41,11 +41,350 @@ static char rcsid[] = "$Id: outbuffer.c 160877 2015-03-13 00:31:23Z twu $";
#endif
+/* sam-to-bam conversions always need the headers */
+#define SAM_HEADERS_ON_EMPTY_FILES 1
+
+static int argc;
+static char **argv;
+static int optind_save;
+
+static Univ_IIT_T chromosome_iit;
+static bool any_circular_p;
+static int nworkers;
+static bool orderedp;
+static bool quiet_if_excessive_p;
+
+#ifdef GSNAP
+static bool output_sam_p;
+#else
+static Printtype_T printtype;
+static Sequence_T usersegment;
+#endif
+
+static bool sam_headers_p;
+static char *sam_read_group_id;
+static char *sam_read_group_name;
+static char *sam_read_group_library;
+static char *sam_read_group_platform;
+
+static bool appendp;
+static char *output_file;
+static char *split_output_root;
+static char *failedinput_root;
+
+#ifdef USE_MPI
+static MPI_File *outputs;
+#ifdef GSNAP
+static MPI_File output_failedinput_1;
+static MPI_File output_failedinput_2;
+#else
+static MPI_File output_failedinput;
+#endif
+
+
+#else
+static char *write_mode;
+static FILE **outputs = NULL;
+#ifdef GSNAP
+static FILE *output_failedinput_1;
+static FILE *output_failedinput_2;
+#else
+static FILE *output_failedinput;
+#endif
+
+#endif
+
+
+
+/* Taken from Univ_IIT_dump_sam */
+static void
+dump_sam_usersegment (FILE *fp, Sequence_T usersegment,
+ char *sam_read_group_id, char *sam_read_group_name,
+ char *sam_read_group_library, char *sam_read_group_platform) {
+
+ fprintf(fp,"@SQ\tSN:%s",Sequence_accession(usersegment));
+ fprintf(fp,"\tLN:%u",Sequence_fulllength(usersegment));
+ fprintf(fp,"\n");
+
+ if (sam_read_group_id != NULL) {
+ fprintf(fp,"@RG\tID:%s",sam_read_group_id);
+ if (sam_read_group_platform != NULL) {
+ fprintf(fp,"\tPL:%s",sam_read_group_platform);
+ }
+ if (sam_read_group_library != NULL) {
+ fprintf(fp,"\tLB:%s",sam_read_group_library);
+ }
+ fprintf(fp,"\tSM:%s",sam_read_group_name);
+ fprintf(fp,"\n");
+ }
+
+ return;
+}
+
+#ifndef GSNAP
+static void
+print_gff_header (FILE *fp, int argc, char **argv, int optind) {
+ char **argstart;
+ int c;
+
+ fprintf(fp,"##gff-version 3\n");
+ fprintf(fp,"# Generated by GMAP version %s using call: ",PACKAGE_VERSION);
+ argstart = &(argv[-optind]);
+ for (c = 0; c < argc + optind; c++) {
+ fprintf(fp," %s",argstart[c]);
+ }
+ fprintf(fp,"\n");
+ return;
+}
+#endif
+
+
+static void
+print_file_headers (
+#ifdef USE_MPI
+ MPI_File output
+#else
+ FILE *output
+#endif
+ ) {
+#ifdef GSNAP
+ if (output_sam_p == true && sam_headers_p == true) {
+ SAM_header_print_HD(output,nworkers,orderedp);
+ SAM_header_print_PG(output,argc,argv,optind_save);
+ Univ_IIT_dump_sam(output,chromosome_iit,sam_read_group_id,sam_read_group_name,
+ sam_read_group_library,sam_read_group_platform);
+ }
+
+#else
+ if (printtype == GFF3_GENE || printtype == GFF3_MATCH_CDNA || printtype == GFF3_MATCH_EST) {
+ print_gff_header(output,argc,argv,optind_save);
+
+#ifndef PMAP
+ } else if (printtype == SAM && sam_headers_p == true) {
+ if (usersegment != NULL) {
+ dump_sam_usersegment(output,usersegment,sam_read_group_id,sam_read_group_name,
+ sam_read_group_library,sam_read_group_platform);
+ } else {
+ SAM_header_print_HD(output,nworkers,orderedp);
+ SAM_header_print_PG(output,argc,argv,optind_save);
+ Univ_IIT_dump_sam(output,chromosome_iit,sam_read_group_id,sam_read_group_name,
+ sam_read_group_library,sam_read_group_platform);
+ }
+#endif
+
+ }
+#endif
+
+ return;
+}
+
+
+static void
+failedinput_open (char *failedinput_root) {
+ char *filename;
+
+#ifdef GSNAP
+ filename = (char *) MALLOC((strlen(failedinput_root)+strlen(".1")+1) * sizeof(char));
+ sprintf(filename,"%s.1",failedinput_root);
+
+#ifdef USE_MPI
+ if (appendp == true) {
+ MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_APPEND,
+ MPI_INFO_NULL,&output_failedinput_1);
+ } else {
+ /* Need to remove existing file, if any */
+ MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_DELETE_ON_CLOSE,
+ MPI_INFO_NULL,&output_failedinput_1);
+ MPI_File_close(&output_failedinput_1);
+ MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_CREATE | MPI_MODE_WRONLY,
+ MPI_INFO_NULL,&output_failedinput_1);
+ }
+#else
+ if ((output_failedinput_1 = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+#endif
+
+ /* Re-use filename, since it is the same length */
+ sprintf(filename,"%s.2",failedinput_root);
+#ifdef USE_MPI
+ if (appendp == true) {
+ MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_APPEND,
+ MPI_INFO_NULL,&output_failedinput_2);
+ } else {
+ /* Need to remove existing file, if any */
+ MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_DELETE_ON_CLOSE,
+ MPI_INFO_NULL,&output_failedinput_2);
+ MPI_File_close(&output_failedinput_2);
+ MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_CREATE | MPI_MODE_WRONLY,
+ MPI_INFO_NULL,&output_failedinput_2);
+ }
+#else
+ if ((output_failedinput_2 = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+#endif
+ FREE(filename);
+
+#else /* GMAP */
+ filename = (char *) MALLOC((strlen(failedinput_root)+1) * sizeof(char));
+ sprintf(filename,"%s",failedinput_root);
+#ifdef USE_MPI
+ if (appendp == true) {
+ MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_APPEND,
+ MPI_INFO_NULL,&output_failedinput);
+ } else {
+ /* Need to remove existing file, if any */
+ MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_DELETE_ON_CLOSE,
+ MPI_INFO_NULL,&output_failedinput);
+ MPI_File_close(&output_failedinput);
+ MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_CREATE | MPI_MODE_WRONLY,
+ MPI_INFO_NULL,&output_failedinput);
+ }
+#else
+ if ((output_failedinput = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ }
+#endif
+ FREE(filename);
+
+#endif /* GSNAP */
+
+ return;
+}
+
+
+void
+Outbuffer_setup (int argc_in, char **argv_in, int optind_in,
+ Univ_IIT_T chromosome_iit_in, bool any_circular_p_in,
+ int nworkers_in, bool orderedp_in, bool quiet_if_excessive_p_in,
+#ifdef GSNAP
+ bool output_sam_p_in,
+#else
+ Printtype_T printtype_in, Sequence_T usersegment_in,
+#endif
+ bool sam_headers_p_in, char *sam_read_group_id_in, char *sam_read_group_name_in,
+ char *sam_read_group_library_in, char *sam_read_group_platform_in,
+ bool appendp_in, char *output_file_in, char *split_output_root_in, char *failedinput_root_in) {
+#ifdef USE_MPI
+ SAM_split_output_type split_output;
+#endif
+
+
+ argc = argc_in;
+ argv = argv_in;
+ optind_save = optind_in;
+
+ chromosome_iit = chromosome_iit_in;
+ any_circular_p = any_circular_p_in;
+
+ nworkers = nworkers_in;
+ orderedp = orderedp_in;
+ quiet_if_excessive_p = quiet_if_excessive_p_in;
+
+#ifdef GSNAP
+ output_sam_p = output_sam_p_in;
+#else
+ printtype = printtype_in;
+ usersegment = usersegment_in;
+#endif
+
+ sam_headers_p = sam_headers_p_in;
+ sam_read_group_id = sam_read_group_id_in;
+ sam_read_group_name = sam_read_group_name_in;
+ sam_read_group_library = sam_read_group_library_in;
+ sam_read_group_platform = sam_read_group_platform_in;
+
+ appendp = appendp_in;
+ split_output_root = split_output_root_in;
+ output_file = output_file_in;
+
+
+ /************************************************************************/
+ /* Output files */
+ /************************************************************************/
+
+#ifdef USE_MPI
+ /* All processes need to run MPI_File_open, and need to open all files now */
+ outputs = (MPI_File *) CALLOC_KEEP(1+N_SPLIT_OUTPUTS,sizeof(MPI_File));
+ if (split_output_root != NULL) {
+ for (split_output = 1; split_output <= N_SPLIT_OUTPUTS; split_output++) {
+ outputs[split_output] = SAM_header_open_file(split_output,split_output_root,appendp);
+#ifdef SAM_HEADERS_ON_EMPTY_FILES
+ print_file_headers(outputs[split_output]);
+#endif
+ }
+
+ } else if (output_file != NULL) {
+ outputs[0] = SAM_header_open_file(OUTPUT_NONE,/*split_output_root*/output_file,appendp);
+#ifdef SAM_HEADERS_ON_EMPTY_FILES
+ print_file_headers(outputs[0]);
+#endif
+ for (split_output = 1; split_output <= N_SPLIT_OUTPUTS; split_output++) {
+ outputs[split_output] = outputs[0];
+ }
+
+ } else {
+ /* Write to stdout */
+ outputs[0] = (MPI_File) NULL;
+#ifdef SAM_HEADERS_ON_EMPTY_FILES
+ print_file_headers(outputs[0]);
+#endif
+ for (split_output = 1; split_output <= N_SPLIT_OUTPUTS; split_output++) {
+ outputs[split_output] = (MPI_File) NULL;
+ }
+ }
+
+#else
+ /* Only the output thread needs to run fopen, and can open files when needed */
+ if (appendp == true) {
+ write_mode = "a";
+ } else {
+ write_mode = "w";
+ }
+ outputs = (FILE **) CALLOC_KEEP(1+N_SPLIT_OUTPUTS,sizeof(FILE *));
+#endif
+
+
+ /************************************************************************/
+ /* Failed input files */
+ /************************************************************************/
+
+ failedinput_root = failedinput_root_in;
+ if (failedinput_root == NULL) {
+#ifdef GSNAP
+ output_failedinput_1 = output_failedinput_2 = NULL;
+#else
+ output_failedinput = NULL;
+#endif
+ } else {
+ failedinput_open(failedinput_root);
+ }
+
+ return;
+}
+
+
+void
+Outbuffer_cleanup () {
+ FREE(outputs);
+ return;
+}
+
+
typedef struct RRlist_T *RRlist_T;
struct RRlist_T {
int id;
- Result_T result;
- Request_T request;
+ Filestring_T fp;
+#ifdef GSNAP
+ Filestring_T fp_failedinput_1;
+ Filestring_T fp_failedinput_2;
+#else
+ Filestring_T fp_failedinput;
+#endif
RRlist_T next;
};
@@ -56,7 +395,7 @@ RRlist_dump (RRlist_T head, RRlist_T tail) {
RRlist_T this;
printf("head %p\n",head);
- for (this = head; this != NULL; this = head->next) {
+ for (this = head; this != NULL; this = this->next) {
printf("%p: next %p\n",this,this->next);
}
printf("tail %p\n",tail);
@@ -68,12 +407,23 @@ RRlist_dump (RRlist_T head, RRlist_T tail) {
/* Returns new tail */
static RRlist_T
-RRlist_push (RRlist_T *head, RRlist_T tail, Request_T request, Result_T result) {
+RRlist_push (RRlist_T *head, RRlist_T tail, Filestring_T fp,
+#ifdef GSNAP
+ Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2
+#else
+ Filestring_T fp_failedinput
+#endif
+ ) {
RRlist_T new;
new = (RRlist_T) MALLOC_OUT(sizeof(*new)); /* Called by worker thread */
- new->request = request;
- new->result = result;
+ new->fp = fp;
+#ifdef GSNAP
+ new->fp_failedinput_1 = fp_failedinput_1;
+ new->fp_failedinput_2 = fp_failedinput_2;
+#else
+ new->fp_failedinput = fp_failedinput;
+#endif
new->next = (RRlist_T) NULL;
if (*head == NULL) { /* Equivalent to tail == NULL, but using *head avoids having to set tail in RRlist_pop */
@@ -88,11 +438,22 @@ RRlist_push (RRlist_T *head, RRlist_T tail, Request_T request, Result_T result)
/* Returns new head */
static RRlist_T
-RRlist_pop (RRlist_T head, Request_T *request, Result_T *result) {
+RRlist_pop (RRlist_T head, Filestring_T *fp,
+#ifdef GSNAP
+ Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2
+#else
+ Filestring_T *fp_failedinput
+#endif
+ ) {
RRlist_T newhead;
- *request = head->request;
- *result = head->result;
+ *fp = head->fp;
+#ifdef GSNAP
+ *fp_failedinput_1 = head->fp_failedinput_1;
+ *fp_failedinput_2 = head->fp_failedinput_2;
+#else
+ *fp_failedinput = head->fp_failedinput;
+#endif
newhead = head->next;
@@ -102,7 +463,13 @@ RRlist_pop (RRlist_T head, Request_T *request, Result_T *result) {
static RRlist_T
-RRlist_insert (RRlist_T list, int id, Request_T request, Result_T result) {
+RRlist_insert (RRlist_T list, int id, Filestring_T fp,
+#ifdef GSNAP
+ Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2
+#else
+ Filestring_T fp_failedinput
+#endif
+ ) {
RRlist_T *p;
RRlist_T new;
@@ -113,8 +480,13 @@ RRlist_insert (RRlist_T list, int id, Request_T request, Result_T result) {
new = (RRlist_T) MALLOC_OUT(sizeof(*new));
new->id = id;
- new->request = request;
- new->result = result;
+ new->fp = fp;
+#ifdef GSNAP
+ new->fp_failedinput_1 = fp_failedinput_1;
+ new->fp_failedinput_2 = fp_failedinput_2;
+#else
+ new->fp_failedinput = fp_failedinput;
+#endif
new->next = *p;
*p = new;
@@ -123,12 +495,23 @@ RRlist_insert (RRlist_T list, int id, Request_T request, Result_T result) {
/* Returns new head */
static RRlist_T
-RRlist_pop_id (RRlist_T head, int *id, Request_T *request, Result_T *result) {
+RRlist_pop_id (RRlist_T head, int *id, Filestring_T *fp,
+#ifdef GSNAP
+ Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2
+#else
+ Filestring_T *fp_failedinput
+#endif
+ ) {
RRlist_T newhead;
*id = head->id;
- *request = head->request;
- *result = head->result;
+ *fp = head->fp;
+#ifdef GSNAP
+ *fp_failedinput_1 = head->fp_failedinput_1;
+ *fp_failedinput_2 = head->fp_failedinput_2;
+#else
+ *fp_failedinput = head->fp_failedinput;
+#endif
newhead = head->next;
@@ -137,151 +520,9 @@ RRlist_pop_id (RRlist_T head, int *id, Request_T *request, Result_T *result) {
}
-
-
#define T Outbuffer_T
struct T {
-#ifndef GSNAP
- Genome_T genome;
-#endif
-
- Univ_IIT_T chromosome_iit;
-
- char *sevenway_root;
- char *failedinput_root;
- bool appendp;
-
-#ifdef GSNAP
- bool sam_headers_p;
- char *sam_read_group_id;
- char *sam_read_group_name;
- char *sam_read_group_library;
- char *sam_read_group_platform;
- int quality_shift;
- int nworkers;
- bool orderedp;
- int argc;
- char **argv;
- int optind;
-#elif defined PMAP
-
-#else
- bool sam_headers_p;
- bool sam_paired_p;
- char *sam_read_group_id;
- char *sam_read_group_name;
- char *sam_read_group_library;
- char *sam_read_group_platform;
- int quality_shift;
- int nworkers;
- bool orderedp;
- int argc;
- char **argv;
- int optind;
-#endif
-
- FILE *fp_failedinput_1;
- FILE *fp_failedinput_2;
-
-#ifdef GSNAP
-
- FILE *fp_nomapping; /* NM */
- FILE *fp_halfmapping_uniq; /* HU */
- FILE *fp_halfmapping_circular; /* HC */
- FILE *fp_halfmapping_transloc; /* HT */
- FILE *fp_halfmapping_mult; /* HM */
- FILE *fp_halfmapping_mult_xs_1; /* HX */
- FILE *fp_halfmapping_mult_xs_2; /* HX */
- FILE *fp_unpaired_uniq; /* UU */
- FILE *fp_unpaired_circular; /* UC */
- FILE *fp_unpaired_transloc; /* UT */
- FILE *fp_unpaired_mult; /* UM */
- FILE *fp_unpaired_mult_xs_1; /* UX */
- FILE *fp_unpaired_mult_xs_2; /* UX */
- FILE *fp_paired_uniq_circular; /* PC */
- FILE *fp_paired_uniq_inv; /* PI */
- FILE *fp_paired_uniq_scr; /* PS */
- FILE *fp_paired_uniq_long; /* PL */
- FILE *fp_paired_mult; /* PM */
- FILE *fp_paired_mult_xs_1; /* PX */
- FILE *fp_paired_mult_xs_2; /* PX */
- FILE *fp_concordant_uniq; /* CU */
- FILE *fp_concordant_circular; /* CC */
- FILE *fp_concordant_transloc; /* CT */
- FILE *fp_concordant_mult; /* CM */
- FILE *fp_concordant_mult_xs_1; /* CX */
- FILE *fp_concordant_mult_xs_2; /* CX */
-
- bool timingp;
- bool output_sam_p;
- Gobywriter_T gobywriter;
-
- bool fastq_format_p;
- bool clip_overlap_p; /* clip_overlap_p and merge_overlap_p cannot both be true */
- bool merge_overlap_p;
- bool merge_samechr_p;
- bool print_m8_p;
-
- bool invert_first_p;
- bool invert_second_p;
- Chrpos_T pairmax;
-
-#else
-
- FILE *fp_nomapping; /* NM */
- FILE *fp_uniq; /* UU */
- FILE *fp_circular; /* UC */
- FILE *fp_transloc; /* UT */
- FILE *fp_mult; /* UM */
- FILE *fp_mult_xs; /* UX */
-
- bool chimeras_allowed_p;
-
- char *user_genomicseg;
- Sequence_T usersegment;
-
- char *dbversion;
- char *chrsubset_name;
- Univ_IIT_T contig_iit;
- IIT_T altstrain_iit;
- IIT_T map_iit;
- int *map_divint_crosstable;
-
- Printtype_T printtype;
- bool checksump;
- int chimera_margin;
-
- bool map_exons_p;
- bool map_bothstrands_p;
- bool print_comment_p;
- int nflanking;
-
- int proteinmode;
- int invertmode;
- bool nointronlenp;
-
- int wraplength;
- int ngap;
- int cds_startpos;
-
- bool fulllengthp;
- bool truncatep;
- bool strictp;
- bool diagnosticp;
- bool maponlyp;
-
- bool stage1debug;
- bool diag_debug;
- bool debug_graphic_p;
-
-#endif
-
- int maxpaths_report;
- bool nofailsp;
- bool failsonlyp;
- bool quiet_if_excessive_p;
-
#ifdef HAVE_PTHREAD
pthread_mutex_t lock;
#endif
@@ -289,13 +530,14 @@ struct T {
unsigned int output_buffer_size;
unsigned int nread;
unsigned int ntotal;
+ unsigned int nbeyond; /* MPI request that is beyond the given inputs */
unsigned int nprocessed;
RRlist_T head;
RRlist_T tail;
#ifdef HAVE_PTHREAD
- pthread_cond_t result_avail_p;
+ pthread_cond_t filestring_avail_p;
#endif
};
@@ -304,2217 +546,459 @@ struct T {
* File routines
************************************************************************/
-static void
-failedinput_close (T this) {
- if (this->fp_failedinput_1 != NULL) {
- fclose(this->fp_failedinput_1);
- }
- if (this->fp_failedinput_2 != NULL) {
- fclose(this->fp_failedinput_2);
- }
- return;
-}
+T
+Outbuffer_new (unsigned int output_buffer_size, unsigned int nread) {
+ T new = (T) MALLOC_KEEP(sizeof(*new));
-#ifdef GSNAP
+#ifdef HAVE_PTHREAD
+ pthread_mutex_init(&new->lock,NULL);
+#endif
-/* Always open both .1 and .2 */
-static void
-failedinput_open_paired (T this) {
- char *filename;
- char *write_mode;
+ new->output_buffer_size = output_buffer_size;
+ new->nread = nread;
- if (this->appendp == true) {
- write_mode = "a";
- } else {
- write_mode = "w";
- }
+ /* Set to infinity until all reads are input. Used for Pthreads version */
+ new->ntotal = (unsigned int) -1U;
- if (this->failedinput_root != NULL) {
- filename = (char *) MALLOCA((strlen(this->failedinput_root)+strlen(".1")+1) * sizeof(char));
- sprintf(filename,"%s.1",this->failedinput_root);
- if ((this->fp_failedinput_1 = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
-
- /* Re-use filename, since it is the same length */
- sprintf(filename,"%s.2",this->failedinput_root);
- if ((this->fp_failedinput_2 = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREEA(filename);
- }
-
- return;
-}
-
-
-
-static void
-sevenway_open_single (T this) {
- char *filename;
- char *write_mode;
-
- if (this->appendp == true) {
- write_mode = "a";
- } else {
- write_mode = "w";
- }
-
- /* Cannot use alloca easily, since each filename has a different length */
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".nomapping")+1,sizeof(char));
- sprintf(filename,"%s.nomapping",this->sevenway_root);
- if ((this->fp_nomapping = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".unpaired_uniq")+1,sizeof(char));
- sprintf(filename,"%s.unpaired_uniq",this->sevenway_root);
- if ((this->fp_unpaired_uniq = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".unpaired_circular")+1,sizeof(char));
- sprintf(filename,"%s.unpaired_circular",this->sevenway_root);
- if ((this->fp_unpaired_circular = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".unpaired_transloc")+1,sizeof(char));
- sprintf(filename,"%s.unpaired_transloc",this->sevenway_root);
- if ((this->fp_unpaired_transloc = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".unpaired_mult")+1,sizeof(char));
- sprintf(filename,"%s.unpaired_mult",this->sevenway_root);
- if ((this->fp_unpaired_mult = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- if (this->quiet_if_excessive_p == false) {
- this->fp_unpaired_mult_xs_1 = (FILE *) NULL;
-
-#if 0
- } else if (this->fails_as_input_p == true) {
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".unpaired_mult_xs.1.fq")+1,sizeof(char));
- sprintf(filename,"%s.unpaired_mult_xs.1.fq",this->sevenway_root);
- if ((this->fp_unpaired_mult_xs_1 = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-#endif
-
- } else {
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".unpaired_mult_xs")+1,sizeof(char));
- sprintf(filename,"%s.unpaired_mult_xs",this->sevenway_root);
- if ((this->fp_unpaired_mult_xs_1 = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- if (this->output_sam_p == true && this->sam_headers_p == true) {
- SAM_header_print_HD(this->fp_unpaired_mult_xs_1,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_unpaired_mult_xs_1,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_unpaired_mult_xs_1,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- }
- }
-
-
- if (this->output_sam_p == true && this->sam_headers_p == true) {
- SAM_header_print_HD(this->fp_nomapping,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_nomapping,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_nomapping,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_unpaired_uniq,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_unpaired_uniq,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_unpaired_uniq,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_unpaired_circular,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_unpaired_circular,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_unpaired_circular,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_unpaired_transloc,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_unpaired_transloc,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_unpaired_transloc,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_unpaired_mult,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_unpaired_mult,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_unpaired_mult,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- }
-
- if (this->output_sam_p == true) {
- SAM_file_setup_single(this->fp_failedinput_1,this->fp_nomapping,this->fp_unpaired_uniq,this->fp_unpaired_circular,
- this->fp_unpaired_transloc,this->fp_unpaired_mult,this->fp_unpaired_mult_xs_1);
- } else {
- Stage3hr_file_setup_single(this->fp_failedinput_1,this->fp_nomapping,this->fp_unpaired_uniq,this->fp_unpaired_circular,
- this->fp_unpaired_transloc,this->fp_unpaired_mult,this->fp_unpaired_mult_xs_1);
- }
-
- return;
-}
-
-
-static void
-sevenway_open_paired (T this) {
- char *filename;
- char *write_mode;
-
- if (this->appendp == true) {
- write_mode = "a";
- } else {
- write_mode = "w";
- }
-
- if (this->quiet_if_excessive_p == false) {
- this->fp_unpaired_mult_xs_1 = (FILE *) NULL;
- this->fp_unpaired_mult_xs_2 = (FILE *) NULL;
-
-#if 0
- } else if (this->fails_as_input_p == true) {
- if (this->fp_unpaired_mult_xs_1 == NULL) {
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".unpaired_mult_xs.1.fq")+1,sizeof(char));
- sprintf(filename,"%s.unpaired_mult_xs.1.fq",this->sevenway_root);
- if ((this->fp_unpaired_mult_xs_1 = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
- }
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".unpaired_mult_xs.2.fq")+1,sizeof(char));
- sprintf(filename,"%s.unpaired_mult_xs.2.fq",this->sevenway_root);
- if ((this->fp_unpaired_mult_xs_2 = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-#endif
-
- } else {
- if (this->fp_unpaired_mult_xs_1 == NULL) {
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".unpaired_mult_xs")+1,sizeof(char));
- sprintf(filename,"%s.unpaired_mult_xs",this->sevenway_root);
- if ((this->fp_unpaired_mult_xs_1 = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- if (this->output_sam_p == true && this->sam_headers_p == true) {
- SAM_header_print_HD(this->fp_unpaired_mult_xs_1,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_unpaired_mult_xs_1,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_unpaired_mult_xs_1,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- }
- }
- }
-
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".halfmapping_uniq")+1,sizeof(char));
- sprintf(filename,"%s.halfmapping_uniq",this->sevenway_root);
- if ((this->fp_halfmapping_uniq = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".halfmapping_circular")+1,sizeof(char));
- sprintf(filename,"%s.halfmapping_circular",this->sevenway_root);
- if ((this->fp_halfmapping_circular = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".halfmapping_transloc")+1,sizeof(char));
- sprintf(filename,"%s.halfmapping_transloc",this->sevenway_root);
- if ((this->fp_halfmapping_transloc = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".halfmapping_mult")+1,sizeof(char));
- sprintf(filename,"%s.halfmapping_mult",this->sevenway_root);
- if ((this->fp_halfmapping_mult = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- if (this->quiet_if_excessive_p == false) {
- this->fp_halfmapping_mult_xs_1 = (FILE *) NULL;
- this->fp_halfmapping_mult_xs_2 = (FILE *) NULL;
-
-#if 0
- } else if (this->fails_as_input_p == true) {
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".halfmapping_mult_xs.1.fq")+1,sizeof(char));
- sprintf(filename,"%s.halfmapping_mult_xs.1.fq",this->sevenway_root);
- if ((this->fp_halfmapping_mult_xs_1 = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".halfmapping_mult_xs.2.fq")+1,sizeof(char));
- sprintf(filename,"%s.halfmapping_mult_xs.2.fq",this->sevenway_root);
- if ((this->fp_halfmapping_mult_xs_2 = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-#endif
-
- } else {
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".halfmapping_mult_xs")+1,sizeof(char));
- sprintf(filename,"%s.halfmapping_mult_xs",this->sevenway_root);
- if ((this->fp_halfmapping_mult_xs_1 = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- if (this->output_sam_p == true && this->sam_headers_p == true) {
- SAM_header_print_HD(this->fp_halfmapping_mult_xs_1,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_halfmapping_mult_xs_1,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_halfmapping_mult_xs_1,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- }
- }
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".paired_uniq_circular")+1,sizeof(char));
- sprintf(filename,"%s.paired_uniq_circular",this->sevenway_root);
- if ((this->fp_paired_uniq_circular = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".paired_uniq_inv")+1,sizeof(char));
- sprintf(filename,"%s.paired_uniq_inv",this->sevenway_root);
- if ((this->fp_paired_uniq_inv = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".paired_uniq_scr")+1,sizeof(char));
- sprintf(filename,"%s.paired_uniq_scr",this->sevenway_root);
- if ((this->fp_paired_uniq_scr = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".paired_uniq_long")+1,sizeof(char));
- sprintf(filename,"%s.paired_uniq_long",this->sevenway_root);
- if ((this->fp_paired_uniq_long = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".paired_mult")+1,sizeof(char));
- sprintf(filename,"%s.paired_mult",this->sevenway_root);
- if ((this->fp_paired_mult = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- if (this->quiet_if_excessive_p == false) {
- this->fp_paired_mult_xs_1 = (FILE *) NULL;
- this->fp_paired_mult_xs_2 = (FILE *) NULL;
-
-#if 0
- } else if (this->fails_as_input_p == true) {
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".paired_mult_xs.1.fq")+1,sizeof(char));
- sprintf(filename,"%s.paired_mult_xs.1.fq",this->sevenway_root);
- if ((this->fp_paired_mult_xs_1 = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".paired_mult_xs.2.fq")+1,sizeof(char));
- sprintf(filename,"%s.paired_mult_xs.2.fq",this->sevenway_root);
- if ((this->fp_paired_mult_xs_2 = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-#endif
-
- } else {
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".paired_mult_xs")+1,sizeof(char));
- sprintf(filename,"%s.paired_mult_xs",this->sevenway_root);
- if ((this->fp_paired_mult_xs_1 = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- if (this->output_sam_p == true && this->sam_headers_p == true) {
- SAM_header_print_HD(this->fp_paired_mult_xs_1,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_paired_mult_xs_1,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_paired_mult_xs_1,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- }
- }
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".concordant_uniq")+1,sizeof(char));
- sprintf(filename,"%s.concordant_uniq",this->sevenway_root);
- if ((this->fp_concordant_uniq = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".concordant_circular")+1,sizeof(char));
- sprintf(filename,"%s.concordant_circular",this->sevenway_root);
- if ((this->fp_concordant_circular = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".concordant_transloc")+1,sizeof(char));
- sprintf(filename,"%s.concordant_transloc",this->sevenway_root);
- if ((this->fp_concordant_transloc = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".concordant_mult")+1,sizeof(char));
- sprintf(filename,"%s.concordant_mult",this->sevenway_root);
- if ((this->fp_concordant_mult = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- if (this->quiet_if_excessive_p == false) {
- this->fp_concordant_mult_xs_1 = (FILE *) NULL;
- this->fp_concordant_mult_xs_2 = (FILE *) NULL;
-
-#if 0
- } else if (this->fails_as_input_p == true) {
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".concordant_mult_xs.1.fq")+1,sizeof(char));
- sprintf(filename,"%s.concordant_mult_xs.1.fq",this->sevenway_root);
- if ((this->fp_concordant_mult_xs_1 = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".concordant_mult_xs.2.fq")+1,sizeof(char));
- sprintf(filename,"%s.concordant_mult_xs.2.fq",this->sevenway_root);
- if ((this->fp_concordant_mult_xs_2 = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-#endif
-
- } else {
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".concordant_mult_xs")+1,sizeof(char));
- sprintf(filename,"%s.concordant_mult_xs",this->sevenway_root);
- if ((this->fp_concordant_mult_xs_1 = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- if (this->output_sam_p == true && this->sam_headers_p == true) {
- SAM_header_print_HD(this->fp_concordant_mult_xs_1,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_concordant_mult_xs_1,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_concordant_mult_xs_1,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- }
- }
-
- if (this->output_sam_p == true && this->sam_headers_p == true) {
- SAM_header_print_HD(this->fp_halfmapping_uniq,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_halfmapping_uniq,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_halfmapping_uniq,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_halfmapping_circular,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_halfmapping_circular,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_halfmapping_circular,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_halfmapping_transloc,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_halfmapping_transloc,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_halfmapping_transloc,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_halfmapping_mult,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_halfmapping_mult,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_halfmapping_mult,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_paired_uniq_circular,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_paired_uniq_circular,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_paired_uniq_circular,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_paired_uniq_inv,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_paired_uniq_inv,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_paired_uniq_inv,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_paired_uniq_scr,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_paired_uniq_scr,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_paired_uniq_scr,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_paired_uniq_long,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_paired_uniq_long,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_paired_uniq_long,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_paired_mult,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_paired_mult,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_paired_mult,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_concordant_uniq,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_concordant_uniq,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_concordant_uniq,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_concordant_circular,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_concordant_circular,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_concordant_circular,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_concordant_transloc,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_concordant_transloc,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_concordant_transloc,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_concordant_mult,this->nworkers,this->orderedp);
- SAM_header_print_PG(this->fp_concordant_mult,this->argc,this->argv,this->optind);
- Univ_IIT_dump_sam(this->fp_concordant_mult,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- }
-
- if (this->output_sam_p == true) {
- SAM_file_setup_paired(this->fp_failedinput_1,this->fp_failedinput_2,this->fp_nomapping,
- this->fp_halfmapping_uniq,this->fp_halfmapping_circular,
- this->fp_halfmapping_transloc,this->fp_halfmapping_mult,
- this->fp_halfmapping_mult_xs_1,this->fp_halfmapping_mult_xs_2,
- this->fp_paired_uniq_circular,this->fp_paired_uniq_inv,this->fp_paired_uniq_scr,
- this->fp_paired_uniq_long,this->fp_paired_mult,
- this->fp_paired_mult_xs_1,this->fp_paired_mult_xs_2,
- this->fp_concordant_uniq,this->fp_concordant_circular,
- this->fp_concordant_transloc,this->fp_concordant_mult,
- this->fp_concordant_mult_xs_1,this->fp_concordant_mult_xs_2);
-
- } else {
- Stage3hr_file_setup_paired(this->fp_failedinput_1,this->fp_failedinput_2,this->fp_nomapping,
- this->fp_halfmapping_uniq,this->fp_halfmapping_circular,
- this->fp_halfmapping_transloc,this->fp_halfmapping_mult,
- this->fp_halfmapping_mult_xs_1,this->fp_halfmapping_mult_xs_2,
- this->fp_paired_uniq_circular,this->fp_paired_uniq_inv,this->fp_paired_uniq_scr,
- this->fp_paired_uniq_long,this->fp_paired_mult,
- this->fp_paired_mult_xs_1,this->fp_paired_mult_xs_2,
- this->fp_concordant_uniq,this->fp_concordant_circular,
- this->fp_concordant_transloc,this->fp_concordant_mult,
- this->fp_concordant_mult_xs_1,this->fp_concordant_mult_xs_2);
- }
-
- return;
-}
-
-static void
-sevenway_close (T this) {
- fclose(this->fp_unpaired_uniq);
- fclose(this->fp_unpaired_circular);
- fclose(this->fp_unpaired_transloc);
- fclose(this->fp_unpaired_mult);
- if (this->quiet_if_excessive_p == true) {
- fclose(this->fp_unpaired_mult_xs_1);
- if (this->fp_unpaired_mult_xs_2 != NULL) {
- fclose(this->fp_unpaired_mult_xs_2);
- }
- }
- if (this->fp_nomapping != NULL) {
- fclose(this->fp_nomapping);
- }
- if (this->fp_halfmapping_uniq != NULL) {
- /* Paired output */
- fclose(this->fp_halfmapping_uniq);
- fclose(this->fp_halfmapping_circular);
- fclose(this->fp_halfmapping_transloc);
- fclose(this->fp_halfmapping_mult);
- fclose(this->fp_paired_uniq_long);
- fclose(this->fp_paired_uniq_scr);
- fclose(this->fp_paired_uniq_inv);
- fclose(this->fp_paired_uniq_circular);
- fclose(this->fp_paired_mult);
- fclose(this->fp_concordant_uniq);
- fclose(this->fp_concordant_circular);
- fclose(this->fp_concordant_transloc);
- fclose(this->fp_concordant_mult);
-
- if (this->quiet_if_excessive_p == true) {
- fclose(this->fp_halfmapping_mult_xs_1);
- fclose(this->fp_paired_mult_xs_1);
- fclose(this->fp_concordant_mult_xs_1);
- if (this->fp_halfmapping_mult_xs_2 != NULL) {
- fclose(this->fp_halfmapping_mult_xs_2);
- }
- if (this->fp_paired_mult_xs_2 != NULL) {
- fclose(this->fp_paired_mult_xs_2);
- }
- if (this->fp_concordant_mult_xs_2 != NULL) {
- fclose(this->fp_concordant_mult_xs_2);
- }
- }
- }
-
- return;
-}
-
-#else
-
-/* GMAP version */
-
-static void
-print_gff_header (FILE *fp, int argc, char **argv, int optind) {
- char **argstart;
- int c;
-
- fprintf(fp,"##gff-version 3\n");
- fprintf(fp,"# Generated by GMAP version %s using call: ",PACKAGE_VERSION);
- argstart = &(argv[-optind]);
- for (c = 0; c < argc + optind; c++) {
- fprintf(fp," %s",argstart[c]);
- }
- fprintf(fp,"\n");
- return;
-}
-
-
-/* Taken from Univ_IIT_dump_sam */
-static void
-dump_sam_usersegment (FILE *fp, Sequence_T usersegment,
- char *sam_read_group_id, char *sam_read_group_name,
- char *sam_read_group_library, char *sam_read_group_platform) {
-
- fprintf(fp,"@SQ\tSN:%s",Sequence_accession(usersegment));
- fprintf(fp,"\tLN:%u",Sequence_fulllength(usersegment));
- fprintf(fp,"\n");
-
- if (sam_read_group_id != NULL) {
- fprintf(fp,"@RG\tID:%s",sam_read_group_id);
- if (sam_read_group_platform != NULL) {
- fprintf(fp,"\tPL:%s",sam_read_group_platform);
- }
- if (sam_read_group_library != NULL) {
- fprintf(fp,"\tLB:%s",sam_read_group_library);
- }
- fprintf(fp,"\tSM:%s",sam_read_group_name);
- fprintf(fp,"\n");
- }
-
- return;
-}
-
-
-static void
-failedinput_open (T this) {
- char *filename;
- char *write_mode;
-
- if (this->appendp == true) {
- write_mode = "a";
- } else {
- write_mode = "w";
- }
-
- if (this->failedinput_root != NULL) {
- filename = (char *) CALLOC(strlen(this->failedinput_root)+1,sizeof(char));
- sprintf(filename,"%s",this->failedinput_root);
- if ((this->fp_failedinput_1 = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
- }
-
- return;
-}
-
-
-static void
-sevenway_open (T this, int nworkers, bool orderedp, int argc, char **argv, int optind) {
- char *filename;
- char *write_mode;
-
- if (this->appendp == true) {
- write_mode = "a";
- } else {
- write_mode = "w";
- }
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".nomapping")+1,sizeof(char));
- sprintf(filename,"%s.nomapping",this->sevenway_root);
- if ((this->fp_nomapping = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".uniq")+1,sizeof(char));
- sprintf(filename,"%s.uniq",this->sevenway_root);
- if ((this->fp_uniq = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".circular")+1,sizeof(char));
- sprintf(filename,"%s.circular",this->sevenway_root);
- if ((this->fp_circular = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- if (this->chimeras_allowed_p == true) {
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".transloc")+1,sizeof(char));
- sprintf(filename,"%s.transloc",this->sevenway_root);
- if ((this->fp_transloc = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
- }
-
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".mult")+1,sizeof(char));
- sprintf(filename,"%s.mult",this->sevenway_root);
- if ((this->fp_mult = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- if (this->quiet_if_excessive_p == false) {
- this->fp_mult_xs = (FILE *) NULL;
- } else {
- filename = (char *) CALLOC(strlen(this->sevenway_root)+strlen(".mult_xs")+1,sizeof(char));
- sprintf(filename,"%s.mult_xs",this->sevenway_root);
- if ((this->fp_mult_xs = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
- }
-
- if (this->printtype == GFF3_GENE || this->printtype == GFF3_MATCH_CDNA || this->printtype == GFF3_MATCH_EST) {
- print_gff_header(this->fp_nomapping,argc,argv,optind);
- print_gff_header(this->fp_uniq,argc,argv,optind);
- print_gff_header(this->fp_circular,argc,argv,optind);
- print_gff_header(this->fp_mult,argc,argv,optind);
- if (this->quiet_if_excessive_p == true) {
- print_gff_header(this->fp_mult_xs,argc,argv,optind);
- }
-
-#ifndef PMAP
- } else if (this->printtype == SAM && this->sam_headers_p == true) {
- if (this->usersegment != NULL) {
- dump_sam_usersegment(this->fp_nomapping,this->usersegment,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- dump_sam_usersegment(this->fp_uniq,this->usersegment,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- dump_sam_usersegment(this->fp_circular,this->usersegment,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- dump_sam_usersegment(this->fp_mult,this->usersegment,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- if (this->quiet_if_excessive_p == true) {
- dump_sam_usersegment(this->fp_mult_xs,this->usersegment,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- }
-
- } else {
- SAM_header_print_HD(this->fp_nomapping,nworkers,orderedp);
- SAM_header_print_PG(this->fp_nomapping,argc,argv,optind);
- Univ_IIT_dump_sam(this->fp_nomapping,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_uniq,nworkers,orderedp);
- SAM_header_print_PG(this->fp_uniq,argc,argv,optind);
- Univ_IIT_dump_sam(this->fp_uniq,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_circular,nworkers,orderedp);
- SAM_header_print_PG(this->fp_circular,argc,argv,optind);
- Univ_IIT_dump_sam(this->fp_circular,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- SAM_header_print_HD(this->fp_mult,nworkers,orderedp);
- SAM_header_print_PG(this->fp_mult,argc,argv,optind);
- Univ_IIT_dump_sam(this->fp_mult,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- if (this->quiet_if_excessive_p == true) {
- SAM_header_print_HD(this->fp_mult_xs,nworkers,orderedp);
- SAM_header_print_PG(this->fp_mult_xs,argc,argv,optind);
- Univ_IIT_dump_sam(this->fp_mult_xs,this->chromosome_iit,
- this->sam_read_group_id,this->sam_read_group_name,
- this->sam_read_group_library,this->sam_read_group_platform);
- }
- }
-#endif
- }
-
- return;
-}
-
-static void
-sevenway_close (T this) {
- if (this->quiet_if_excessive_p == true) {
- fclose(this->fp_mult_xs);
- }
- fclose(this->fp_mult);
- fclose(this->fp_circular);
- fclose(this->fp_uniq);
- if (this->chimeras_allowed_p == true) {
- fclose(this->fp_transloc);
- }
- fclose(this->fp_nomapping);
- return;
-}
-
-#endif
-
-
-
-#ifdef GSNAP
-
-T
-Outbuffer_new (unsigned int output_buffer_size, unsigned int nread, char *sevenway_root, char *failedinput_root,
- bool appendp, Univ_IIT_T chromosome_iit, bool timingp,
- bool output_sam_p, bool sam_headers_p, char *sam_read_group_id, char *sam_read_group_name,
- char *sam_read_group_library, char *sam_read_group_platform,
- int nworkers, bool orderedp, Gobywriter_T gobywriter, bool nofailsp, bool failsonlyp,
- bool fastq_format_p, bool clip_overlap_p, bool merge_overlap_p, bool merge_samechr_p, bool print_m8_p,
- int maxpaths_report, bool quiet_if_excessive_p, int quality_shift,
- bool invert_first_p, bool invert_second_p, Chrpos_T pairmax,
- int argc, char **argv, int optind) {
- T new = (T) MALLOC(sizeof(*new));
- FILE *fp_capture = NULL, *fp_ignore = NULL;
-
- new->chromosome_iit = chromosome_iit;
-
- new->fp_failedinput_1 = NULL;
- new->fp_failedinput_2 = NULL;
-
- new->fp_nomapping = NULL;
- new->fp_halfmapping_uniq = NULL;
- new->fp_halfmapping_circular = NULL;
- new->fp_halfmapping_transloc = NULL;
- new->fp_halfmapping_mult = NULL;
- new->fp_halfmapping_mult_xs_1 = NULL;
- new->fp_halfmapping_mult_xs_2 = NULL;
- new->fp_unpaired_uniq = NULL;
- new->fp_unpaired_circular = NULL;
- new->fp_unpaired_transloc = NULL;
- new->fp_unpaired_mult = NULL;
- new->fp_unpaired_mult_xs_1 = NULL;
- new->fp_unpaired_mult_xs_2 = NULL;
- new->fp_paired_uniq_circular = NULL;
- new->fp_paired_uniq_inv = NULL;
- new->fp_paired_uniq_scr = NULL;
- new->fp_paired_uniq_long = NULL;
- new->fp_paired_mult = NULL;
- new->fp_paired_mult_xs_1 = NULL;
- new->fp_paired_mult_xs_2 = NULL;
- new->fp_concordant_uniq = NULL;
- new->fp_concordant_circular = NULL;
- new->fp_concordant_transloc = NULL;
- new->fp_concordant_mult = NULL;
- new->fp_concordant_mult_xs_1 = NULL;
- new->fp_concordant_mult_xs_2 = NULL;
-
- new->sevenway_root = sevenway_root;
- new->failedinput_root = failedinput_root;
- new->appendp = appendp;
-
- new->timingp = timingp;
- new->output_sam_p = output_sam_p;
- new->sam_headers_p = sam_headers_p;
- new->sam_read_group_id = sam_read_group_id;
- new->sam_read_group_name = sam_read_group_name;
- new->sam_read_group_library = sam_read_group_library;
- new->sam_read_group_platform = sam_read_group_platform;
- new->nworkers = nworkers;
- new->orderedp = orderedp;
- new->argc = argc;
- new->argv = argv;
- new->optind = optind;
-
- new->gobywriter = gobywriter;
-
- new->nofailsp = nofailsp;
- new->failsonlyp = failsonlyp;
- new->fastq_format_p = fastq_format_p;
- new->clip_overlap_p = clip_overlap_p;
- new->merge_overlap_p = merge_overlap_p;
- new->merge_samechr_p = merge_samechr_p;
- new->print_m8_p = print_m8_p;
-
- new->maxpaths_report = maxpaths_report;
- new->quiet_if_excessive_p = quiet_if_excessive_p;
-
- new->quality_shift = quality_shift;
- new->invert_first_p = invert_first_p;
- new->invert_second_p = invert_second_p;
- new->pairmax = pairmax;
-
-#ifdef HAVE_PTHREAD
- pthread_mutex_init(&new->lock,NULL);
-#endif
-
- new->output_buffer_size = output_buffer_size;
- new->nread = nread;
- new->ntotal = (unsigned int) -1U; /* Set to infinity until all reads are input */
- new->nprocessed = 0;
+ new->nbeyond = 0;
+ new->nprocessed = 0;
new->head = (RRlist_T) NULL;
new->tail = (RRlist_T) NULL;
#ifdef HAVE_PTHREAD
- pthread_cond_init(&new->result_avail_p,NULL);
-#endif
-
- /* Initialize output streams */
- if (new->gobywriter != NULL) {
- Goby_file_handles(&fp_capture,&fp_ignore,new->gobywriter);
- new->fp_nomapping = fp_ignore;
- new->fp_halfmapping_uniq = fp_capture;
- new->fp_halfmapping_circular = fp_capture;
- new->fp_halfmapping_transloc = fp_capture;
- new->fp_halfmapping_mult = fp_capture;
- new->fp_halfmapping_mult_xs_1 = fp_capture;
- new->fp_halfmapping_mult_xs_2 = fp_capture;
- new->fp_unpaired_uniq = fp_capture;
- new->fp_unpaired_circular = fp_capture;
- new->fp_unpaired_transloc = fp_capture;
- new->fp_unpaired_mult = fp_capture;
- new->fp_unpaired_mult_xs_1 = fp_capture;
- new->fp_unpaired_mult_xs_2 = fp_capture;
- new->fp_paired_uniq_circular = fp_capture;
- new->fp_paired_uniq_inv = fp_capture;
- new->fp_paired_uniq_scr = fp_capture;
- new->fp_paired_uniq_long = fp_capture;
- new->fp_paired_mult = fp_capture;
- new->fp_paired_mult_xs_1 = fp_capture;
- new->fp_paired_mult_xs_2 = fp_capture;
- new->fp_concordant_uniq = fp_capture;
- new->fp_concordant_circular = fp_capture;
- new->fp_concordant_transloc = fp_capture;
- new->fp_concordant_mult = fp_capture;
- new->fp_concordant_mult_xs_1 = fp_capture;
- new->fp_concordant_mult_xs_2 = fp_capture;
-
- if (output_sam_p == true) {
- SAM_file_setup_all(new->fp_failedinput_1,new->fp_failedinput_2,new->fp_nomapping,
- new->fp_unpaired_uniq,new->fp_unpaired_circular,
- new->fp_unpaired_transloc,new->fp_unpaired_mult,
- new->fp_unpaired_mult_xs_1,new->fp_unpaired_mult_xs_2,
- new->fp_halfmapping_uniq,new->fp_halfmapping_circular,
- new->fp_halfmapping_transloc,new->fp_halfmapping_mult,
- new->fp_halfmapping_mult_xs_1,new->fp_halfmapping_mult_xs_2,
- new->fp_paired_uniq_circular,new->fp_paired_uniq_inv,new->fp_paired_uniq_scr,
- new->fp_paired_uniq_long,new->fp_paired_mult,
- new->fp_paired_mult_xs_1,new->fp_paired_mult_xs_2,
- new->fp_concordant_uniq,new->fp_concordant_circular,
- new->fp_concordant_transloc,new->fp_concordant_mult,
- new->fp_concordant_mult_xs_1,new->fp_concordant_mult_xs_2);
-
- } else {
- Stage3hr_file_setup_all(new->fp_failedinput_1,new->fp_failedinput_2,new->fp_nomapping,
- new->fp_unpaired_uniq,new->fp_unpaired_circular,
- new->fp_unpaired_transloc,new->fp_unpaired_mult,
- new->fp_unpaired_mult_xs_1,new->fp_unpaired_mult_xs_2,
- new->fp_halfmapping_uniq,new->fp_halfmapping_circular,
- new->fp_halfmapping_transloc,new->fp_halfmapping_mult,
- new->fp_halfmapping_mult_xs_1,new->fp_halfmapping_mult_xs_2,
- new->fp_paired_uniq_circular,new->fp_paired_uniq_inv,new->fp_paired_uniq_scr,
- new->fp_paired_uniq_long,new->fp_paired_mult,
- new->fp_paired_mult_xs_1,new->fp_paired_mult_xs_2,
- new->fp_concordant_uniq,new->fp_concordant_circular,
- new->fp_concordant_transloc,new->fp_concordant_mult,
- new->fp_concordant_mult_xs_1,new->fp_concordant_mult_xs_2);
- }
-
- } else {
- if (failedinput_root != NULL) {
- failedinput_open_paired(new);
- }
-
- if (sevenway_root != NULL) {
- sevenway_open_single(new);
-
- } else {
- new->fp_nomapping = stdout;
- new->fp_halfmapping_uniq = stdout;
- new->fp_halfmapping_circular = stdout;
- new->fp_halfmapping_transloc = stdout;
- new->fp_halfmapping_mult = stdout;
- new->fp_halfmapping_mult_xs_1 = stdout;
- new->fp_halfmapping_mult_xs_2 = stdout;
- new->fp_unpaired_uniq = stdout;
- new->fp_unpaired_circular = stdout;
- new->fp_unpaired_transloc = stdout;
- new->fp_unpaired_mult = stdout;
- new->fp_unpaired_mult_xs_1 = stdout;
- new->fp_unpaired_mult_xs_2 = stdout;
- new->fp_paired_uniq_circular = stdout;
- new->fp_paired_uniq_inv = stdout;
- new->fp_paired_uniq_scr = stdout;
- new->fp_paired_uniq_long = stdout;
- new->fp_paired_mult = stdout;
- new->fp_paired_mult_xs_1 = stdout;
- new->fp_paired_mult_xs_2 = stdout;
- new->fp_concordant_uniq = stdout;
- new->fp_concordant_circular = stdout;
- new->fp_concordant_transloc = stdout;
- new->fp_concordant_mult = stdout;
- new->fp_concordant_mult_xs_1 = stdout;
- new->fp_concordant_mult_xs_2 = stdout;
-
- if (output_sam_p == true) {
- SAM_file_setup_all(new->fp_failedinput_1,new->fp_failedinput_2,new->fp_nomapping,
- new->fp_unpaired_uniq,new->fp_unpaired_circular,
- new->fp_unpaired_transloc,new->fp_unpaired_mult,
- new->fp_unpaired_mult_xs_1,new->fp_unpaired_mult_xs_2,
- new->fp_halfmapping_uniq,new->fp_halfmapping_circular,
- new->fp_halfmapping_transloc,new->fp_halfmapping_mult,
- new->fp_halfmapping_mult_xs_1,new->fp_halfmapping_mult_xs_2,
- new->fp_paired_uniq_circular,new->fp_paired_uniq_inv,new->fp_paired_uniq_scr,
- new->fp_paired_uniq_long,new->fp_paired_mult,
- new->fp_paired_mult_xs_1,new->fp_paired_mult_xs_2,
- new->fp_concordant_uniq,new->fp_concordant_circular,
- new->fp_concordant_transloc,new->fp_concordant_mult,
- new->fp_concordant_mult_xs_1,new->fp_concordant_mult_xs_2);
-
- } else {
- Stage3hr_file_setup_all(new->fp_failedinput_1,new->fp_failedinput_2,new->fp_nomapping,
- new->fp_unpaired_uniq,new->fp_unpaired_circular,
- new->fp_unpaired_transloc,new->fp_unpaired_mult,
- new->fp_unpaired_mult_xs_1,new->fp_unpaired_mult_xs_2,
- new->fp_halfmapping_uniq,new->fp_halfmapping_circular,
- new->fp_halfmapping_transloc,new->fp_halfmapping_mult,
- new->fp_halfmapping_mult_xs_1,new->fp_halfmapping_mult_xs_2,
- new->fp_paired_uniq_circular,new->fp_paired_uniq_inv,new->fp_paired_uniq_scr,
- new->fp_paired_uniq_long,new->fp_paired_mult,
- new->fp_paired_mult_xs_1,new->fp_paired_mult_xs_2,
- new->fp_concordant_uniq,new->fp_concordant_circular,
- new->fp_concordant_transloc,new->fp_concordant_mult,
- new->fp_concordant_mult_xs_1,new->fp_concordant_mult_xs_2);
- }
-
- if (output_sam_p == true && sam_headers_p == true) {
- SAM_header_print_HD(stdout,nworkers,orderedp);
- SAM_header_print_PG(stdout,argc,argv,optind);
- Univ_IIT_dump_sam(stdout,chromosome_iit,sam_read_group_id,sam_read_group_name,
- sam_read_group_library,sam_read_group_platform);
- }
- }
- }
+ pthread_cond_init(&new->filestring_avail_p,NULL);
+#endif
return new;
}
-#else
-
-T
-Outbuffer_new (unsigned int output_buffer_size, unsigned int nread, char *sevenway_root, char *failedinput_root,
- bool appendp, bool chimeras_allowed_p, char *user_genomicseg, Sequence_T usersegment,
- char *dbversion, Genome_T genome, Univ_IIT_T chromosome_iit,
- char *chrsubset_name, Univ_IIT_T contig_iit, IIT_T altstrain_iit, IIT_T map_iit,
- int *map_divint_crosstable, Printtype_T printtype, bool checksump, int chimera_margin,
-#ifndef PMAP
- bool sam_headers_p, int quality_shift, bool sam_paired_p,
- char *sam_read_group_id, char *sam_read_group_name,
- char *sam_read_group_library, char *sam_read_group_platform,
- int nworkers, bool orderedp,
-#endif
- bool nofailsp, bool failsonlyp, int maxpaths_report, bool quiet_if_excessive_p,
- bool map_exons_p, bool map_bothstrands_p, bool print_comment_p, int nflanking,
- int proteinmode, int invertmode, bool nointronlenp, int wraplength,
- int ngap, int cds_startpos,
- bool fulllengthp, bool truncatep, bool strictp, bool diagnosticp, bool maponlyp,
- bool stage1debug, bool diag_debug, bool debug_graphic_p,
- int argc, char **argv, int optind) {
-
- T new = (T) MALLOC(sizeof(*new));
-
- new->chimeras_allowed_p = chimeras_allowed_p;
-
- new->user_genomicseg = user_genomicseg;
- new->usersegment = usersegment;
-
- new->dbversion = dbversion;
- new->genome = genome;
- new->chromosome_iit = chromosome_iit;
- new->chrsubset_name = chrsubset_name;
- new->contig_iit = contig_iit;
- new->altstrain_iit = altstrain_iit;
- new->map_iit = map_iit;
- new->map_divint_crosstable = map_divint_crosstable;
-
- new->printtype = printtype;
- new->checksump = checksump;
- new->chimera_margin = chimera_margin;
-
- new->sevenway_root = sevenway_root;
- new->failedinput_root = failedinput_root;
- new->appendp = appendp;
-
- new->fp_failedinput_1 = NULL;
- new->fp_failedinput_2 = NULL;
-
- new->fp_nomapping = NULL;
- new->fp_uniq = NULL;
- new->fp_circular = NULL;
- new->fp_transloc = NULL;
- new->fp_mult = NULL;
- new->fp_mult_xs = NULL;
-
-#ifndef PMAP
- new->sam_headers_p = sam_headers_p;
- new->quality_shift = quality_shift;
- new->sam_paired_p = sam_paired_p;
- new->sam_read_group_id = sam_read_group_id;
- new->sam_read_group_name = sam_read_group_name;
- new->sam_read_group_library = sam_read_group_library;
- new->sam_read_group_platform = sam_read_group_platform;
- new->nworkers = nworkers;
- new->orderedp = orderedp;
- new->argc = argc;
- new->argv = argv;
- new->optind = optind;
-#endif
-
- new->nofailsp = nofailsp;
- new->failsonlyp = failsonlyp;
- new->maxpaths_report = maxpaths_report;
- new->quiet_if_excessive_p = quiet_if_excessive_p;
-
- new->map_exons_p = map_exons_p;
- new->map_bothstrands_p = map_bothstrands_p;
- new->print_comment_p = print_comment_p;
-
- new->nflanking = nflanking;
- new->proteinmode = proteinmode;
- new->invertmode = invertmode;
- new->nointronlenp = nointronlenp;
-
- new->wraplength = wraplength;
- new->ngap = ngap;
- new->cds_startpos = cds_startpos;
-
- new->fulllengthp = fulllengthp;
- new->truncatep = truncatep;
- new->strictp = strictp;
- new->diagnosticp = diagnosticp;
- new->maponlyp = maponlyp;
-
- new->stage1debug = stage1debug;
- new->diag_debug = diag_debug;
- new->debug_graphic_p = debug_graphic_p;
-
-#ifdef HAVE_PTHREAD
- pthread_mutex_init(&new->lock,NULL);
-#endif
- new->output_buffer_size = output_buffer_size;
- new->nread = nread;
- new->ntotal = (unsigned int) -1U; /* Set to infinity until all reads are input */
- new->nprocessed = 0;
- new->head = (RRlist_T) NULL;
- new->tail = (RRlist_T) NULL;
+#ifndef USE_MPI
+/* Open empty files, and add SAM headers if SAM_HEADERS_ON_EMPTY_FILES is set */
+static void
+touch_all_single_outputs (FILE **outputs, char *split_output_root, bool appendp) {
+ SAM_split_output_type split_output;
-#ifdef HAVE_PTHREAD
- pthread_cond_init(&new->result_avail_p,NULL);
+ split_output = 1;
+ while (split_output <= N_SPLIT_OUTPUTS_SINGLE_STD) {
+ if (outputs[split_output] == NULL) {
+ outputs[split_output] = SAM_header_open_file(split_output,split_output_root,appendp);
+#ifdef SAM_HEADERS_ON_EMPTY_FILES
+ print_file_headers(outputs[split_output]);
#endif
-
- /* Initialize output streams */
- if (failedinput_root != NULL) {
- failedinput_open(new);
+ }
+ split_output++;
}
- if (sevenway_root != NULL) {
- sevenway_open(new,nworkers,orderedp,argc,argv,optind);
-
+ if (any_circular_p == false) {
+ split_output = N_SPLIT_OUTPUTS_SINGLE_TOCIRC + 1;
} else {
- new->fp_nomapping = stdout;
- new->fp_uniq = stdout;
- new->fp_circular = stdout;
- new->fp_transloc = stdout;
- new->fp_mult = stdout;
- new->fp_mult_xs = stdout;
-
- if (printtype == GFF3_GENE || printtype == GFF3_MATCH_CDNA || printtype == GFF3_MATCH_EST) {
- print_gff_header(stdout,argc,argv,optind);
-
-#ifndef PMAP
- } else if (printtype == SAM && sam_headers_p == true) {
- if (usersegment != NULL) {
- dump_sam_usersegment(stdout,usersegment,sam_read_group_id,sam_read_group_name,
- sam_read_group_library,sam_read_group_platform);
- } else {
- SAM_header_print_HD(stdout,nworkers,orderedp);
- SAM_header_print_PG(stdout,argc,argv,optind);
- Univ_IIT_dump_sam(stdout,chromosome_iit,sam_read_group_id,sam_read_group_name,
- sam_read_group_library,sam_read_group_platform);
- }
+ while (split_output <= N_SPLIT_OUTPUTS_SINGLE_TOCIRC) {
+ if (outputs[split_output] == NULL) {
+ outputs[split_output] = SAM_header_open_file(split_output,split_output_root,appendp);
+#ifdef SAM_HEADERS_ON_EMPTY_FILES
+ print_file_headers(outputs[split_output]);
#endif
-
+ }
+ split_output++;
}
}
- return new;
-}
-
+ if (quiet_if_excessive_p == true) {
+ while (split_output <= N_SPLIT_OUTPUTS_SINGLE) {
+ if (outputs[split_output] == NULL) {
+ outputs[split_output] = SAM_header_open_file(split_output,split_output_root,appendp);
+#ifdef SAM_HEADERS_ON_EMPTY_FILES
+ print_file_headers(outputs[split_output]);
#endif
-
-void
-Outbuffer_free (T *old) {
- if (*old) {
- failedinput_close(*old);
- if ((*old)->sevenway_root != NULL) {
- sevenway_close(*old);
+ }
+ split_output++;
}
-
-#ifdef HAVE_PTHREAD
- pthread_cond_destroy(&(*old)->result_avail_p);
- pthread_mutex_destroy(&(*old)->lock);
-#endif
-
- FREE(*old);
- }
- return;
-}
-
-
-
-unsigned int
-Outbuffer_nread (T this) {
- return this->nread;
-}
-
-
-
-void
-Outbuffer_add_nread (T this, unsigned int nread) {
-
-#ifdef HAVE_PTHREAD
- pthread_mutex_lock(&this->lock);
-#endif
-
- if (nread == 0) {
- /* Finished reading, so able to determine total reads in input */
- this->ntotal = this->nread;
- debug(fprintf(stderr,"__Outbuffer_add_nread added 0 reads, so setting ntotal to be %u\n",this->ntotal));
-
-#ifdef HAVE_PTHREAD
- pthread_cond_signal(&this->result_avail_p);
-#endif
-
- } else {
- this->nread += nread;
- debug(fprintf(stderr,"__Outbuffer_add_nread added %d read, now %d\n",nread,this->nread));
}
-#ifdef HAVE_PTHREAD
- pthread_mutex_unlock(&this->lock);
-#endif
-
return;
}
-
-
-void
-Outbuffer_put_result (T this, Result_T result, Request_T request) {
-
-#ifdef HAVE_PTHREAD
- pthread_mutex_lock(&this->lock);
-#endif
-
- this->tail = RRlist_push(&this->head,this->tail,request,result);
- debug1(RRlist_dump(this->head,this->tail));
- this->nprocessed += 1;
-
-#ifdef HAVE_PTHREAD
- pthread_cond_signal(&this->result_avail_p);
- pthread_mutex_unlock(&this->lock);
#endif
- return;
-}
-
-
-
-#ifdef GSNAP
-
-/************************************************************************
- * Print routines and threads for GSNAP
- ************************************************************************/
+#ifndef USE_MPI
+/* Open empty files, and add SAM headers if SAM_HEADERS_ON_EMPTY_FILES is set */
static void
-print_header_singleend (T this, FILE *fp, Request_T request, bool translocationp, int npaths) {
- Shortread_T queryseq1;
-
- if (this->print_m8_p == false) {
- queryseq1 = Request_queryseq1(request);
+touch_all_paired_outputs (FILE **outputs, char *split_output_root, bool appendp) {
+ SAM_split_output_type split_output;
- fprintf(fp,">");
- Shortread_print_oneline(fp,queryseq1);
- fprintf(fp,"\t%d",npaths);
- if (translocationp == true) {
- fprintf(fp," (transloc)");
- }
-
- /* No sequence inversion on single-end reads */
- if (Shortread_quality_string(queryseq1) != NULL) {
- fprintf(fp,"\t");
- Shortread_print_quality(fp,queryseq1,/*hardclip_low*/0,/*hardclip_high*/0,
- this->quality_shift,/*show_chopped_p*/true);
+ split_output = N_SPLIT_OUTPUTS_SINGLE + 1;
+ while (split_output <= N_SPLIT_OUTPUTS_STD) {
+ if (outputs[split_output] == NULL) {
+ outputs[split_output] = SAM_header_open_file(split_output,split_output_root,appendp);
+#ifdef SAM_HEADERS_ON_EMPTY_FILES
+ print_file_headers(outputs[split_output]);
+#endif
}
-
- fprintf(fp,"\t");
- Shortread_print_header(fp,queryseq1,/*queryseq2*/NULL);
- /* fprintf(fp,"\n"); -- included in header */
+ split_output++;
}
- return;
-}
-
-
-static void
-print_result_sam (T this, Result_T result, Request_T request) {
- Resulttype_T resulttype;
- Shortread_T queryseq1;
- Stage3end_T *stage3array, stage3;
- Chrpos_T chrpos;
- int npaths, pathnum, first_absmq, second_absmq;
- FILE *fp;
- char *abbrev;
-
- resulttype = Result_resulttype(result);
-
- if (resulttype == SINGLEEND_NOMAPPING) {
- if (this->nofailsp == true) {
- /* Skip */
- } else {
- queryseq1 = Request_queryseq1(request);
- SAM_print_nomapping(this->fp_nomapping,ABBREV_NOMAPPING_1,
- queryseq1,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
- /*acc2*/NULL,this->chromosome_iit,resulttype,
- /*first_read_p*/true,/*npaths*/0,/*npaths_mate*/0,/*mate_chrpos*/0U,
- this->quality_shift,this->sam_read_group_id,this->invert_first_p,this->invert_second_p);
- if (this->failedinput_root != NULL) {
- if (this->fastq_format_p == true) {
- Shortread_print_query_singleend_fastq(this->fp_failedinput_1,queryseq1,/*headerseq*/queryseq1);
- } else {
- Shortread_print_query_singleend_fasta(this->fp_failedinput_1,queryseq1,/*headerseq*/queryseq1);
- }
- }
- }
-
- } else if (resulttype == SINGLEEND_UNIQ) {
- stage3array = (Stage3end_T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
-
- if (this->failsonlyp == true) {
- /* Skip */
- } else {
- queryseq1 = Request_queryseq1(request);
- /* Stage3end_eval_and_sort(stage3array,npaths,this->maxpaths_report,queryseq1); */
-
- stage3 = stage3array[0];
- chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1));
- if (Stage3end_circularpos(stage3) > 0) {
- fp = this->fp_unpaired_circular;
- abbrev = ABBREV_UNPAIRED_CIRCULAR;
- } else {
- fp = this->fp_unpaired_uniq;
- abbrev = ABBREV_UNPAIRED_UNIQ;
- }
- SAM_print(fp,abbrev,stage3,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),/*acc2*/NULL,
- /*pathnum*/1,npaths,Stage3end_absmq_score(stage3array[0]),first_absmq,second_absmq,
- Stage3end_mapq_score(stage3array[0]),
- this->chromosome_iit,queryseq1,/*queryseq2*/NULL,
- /*pairedlength*/0,chrpos,/*mate_chrpos*/0U,
- /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
- resulttype,/*first_read_p*/true,/*npaths_mate*/0,this->quality_shift,
- this->sam_read_group_id,this->invert_first_p,this->invert_second_p,
- this->merge_samechr_p);
- }
-
- } else if (resulttype == SINGLEEND_TRANSLOC) {
- stage3array = (Stage3end_T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
-
- if (this->failsonlyp == true) {
- /* Skip */
-
- } else if (this->quiet_if_excessive_p && npaths > this->maxpaths_report) {
- queryseq1 = Request_queryseq1(request);
- /* Stage3end_eval_and_sort(stage3array,npaths,this->maxpaths_report,queryseq1); */
- SAM_print_nomapping(this->fp_unpaired_transloc,ABBREV_UNPAIRED_TRANSLOC,
- queryseq1,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
- /*acc2*/NULL,this->chromosome_iit,resulttype,
- /*first_read_p*/true,npaths,/*npaths_mate*/0,/*mate_chrpos*/0U,
- this->quality_shift,this->sam_read_group_id,this->invert_first_p,this->invert_second_p);
-
- } else {
- queryseq1 = Request_queryseq1(request);
- /* Stage3end_eval_and_sort(stage3array,npaths,this->maxpaths_report,queryseq1); */
- for (pathnum = 1; pathnum <= npaths && pathnum <= this->maxpaths_report; pathnum++) {
-
- stage3 = stage3array[pathnum-1];
- chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1));
- SAM_print(this->fp_unpaired_transloc,ABBREV_UNPAIRED_TRANSLOC,
- stage3,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
- /*acc2*/NULL,pathnum,npaths,
- Stage3end_absmq_score(stage3array[pathnum-1]),first_absmq,second_absmq,
- Stage3end_mapq_score(stage3array[pathnum-1]),
- this->chromosome_iit,queryseq1,/*queryseq2*/NULL,
- /*pairedlength*/0,chrpos,/*mate_chrpos*/0U,
- /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
- resulttype,/*first_read_p*/true,/*npaths_mate*/0,this->quality_shift,
- this->sam_read_group_id,this->invert_first_p,this->invert_second_p,
- this->merge_samechr_p);
+ if (any_circular_p == false) {
+ split_output = N_SPLIT_OUTPUTS_TOCIRC + 1;
+ } else {
+ while (split_output <= N_SPLIT_OUTPUTS_TOCIRC) {
+ if (outputs[split_output] == NULL) {
+ outputs[split_output] = SAM_header_open_file(split_output,split_output_root,appendp);
+#ifdef SAM_HEADERS_ON_EMPTY_FILES
+ print_file_headers(outputs[split_output]);
+#endif
}
+ split_output++;
}
+ }
- } else if (resulttype == SINGLEEND_MULT) {
- stage3array = (Stage3end_T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
-
- if (this->failsonlyp == true) {
- /* Skip */
-
- } else if (this->quiet_if_excessive_p && npaths > this->maxpaths_report) {
- queryseq1 = Request_queryseq1(request);
- /* Stage3end_eval_and_sort(stage3array,npaths,this->maxpaths_report,queryseq1); */
- SAM_print_nomapping(this->fp_unpaired_mult_xs_1,ABBREV_UNPAIRED_MULT_XS,
- queryseq1,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
- /*acc2*/NULL,this->chromosome_iit,resulttype,
- /*first_read_p*/true,npaths,/*npaths_mate*/0,/*mate_chrpos*/0U,
- this->quality_shift,this->sam_read_group_id,this->invert_first_p,this->invert_second_p);
-
- } else {
- queryseq1 = Request_queryseq1(request);
- /* Stage3end_eval_and_sort(stage3array,npaths,this->maxpaths_report,queryseq1); */
- for (pathnum = 1; pathnum <= npaths && pathnum <= this->maxpaths_report; pathnum++) {
-
- stage3 = stage3array[pathnum-1];
- chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1));
- SAM_print(this->fp_unpaired_mult,ABBREV_UNPAIRED_MULT,
- stage3,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
- /*acc2*/NULL,pathnum,npaths,
- Stage3end_absmq_score(stage3array[pathnum-1]),first_absmq,second_absmq,
- Stage3end_mapq_score(stage3array[pathnum-1]),
- this->chromosome_iit,queryseq1,/*queryseq2*/NULL,
- /*pairedlength*/0,chrpos,/*mate_chrpos*/0U,
- /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
- resulttype,/*first_read_p*/true,/*npaths_mate*/0,this->quality_shift,
- this->sam_read_group_id,this->invert_first_p,this->invert_second_p,
- this->merge_samechr_p);
+ if (quiet_if_excessive_p == true) {
+ while (split_output <= N_SPLIT_OUTPUTS) {
+ if (outputs[split_output] == NULL) {
+ outputs[split_output] = SAM_header_open_file(split_output,split_output_root,appendp);
+#ifdef SAM_HEADERS_ON_EMPTY_FILES
+ print_file_headers(outputs[split_output]);
+#endif
}
+ split_output++;
}
-
- } else {
- if (this->fp_concordant_uniq == NULL) {
- sevenway_open_paired(this);
- }
- SAM_print_paired(result,resulttype,this->chromosome_iit,
- Request_queryseq1(request),Request_queryseq2(request),
- this->invert_first_p,this->invert_second_p,
- this->nofailsp,this->failsonlyp,this->clip_overlap_p,this->merge_overlap_p,
- this->merge_samechr_p,this->quality_shift,this->sam_read_group_id);
}
return;
}
-
-
-static void
-print_result_gsnap (T this, Result_T result, Request_T request) {
- Resulttype_T resulttype;
- Shortread_T queryseq1;
- Stage3end_T *stage3array, stage3;
- int npaths, pathnum, first_absmq, second_absmq;
- FILE *fp;
-
- resulttype = Result_resulttype(result);
-
- if (resulttype == SINGLEEND_NOMAPPING) {
- if (this->nofailsp == true) {
- /* Skip */
- } else if (this->print_m8_p) {
- /* Skip */
- } else {
- print_header_singleend(this,this->fp_nomapping,request,/*translocationp*/false,/*npaths*/0);
- fprintf(this->fp_nomapping,"\n");
-
- if (this->failedinput_root != NULL) {
- if (this->fastq_format_p == true) {
- queryseq1 = Request_queryseq1(request);
- Shortread_print_query_singleend_fastq(this->fp_failedinput_1,queryseq1,/*headerseq*/queryseq1);
- } else {
- queryseq1 = Request_queryseq1(request);
- Shortread_print_query_singleend_fasta(this->fp_failedinput_1,queryseq1,/*headerseq*/queryseq1);
- }
- }
- }
-
- } else if (resulttype == SINGLEEND_UNIQ) {
- stage3array = (Stage3end_T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
-
- if (this->failsonlyp == true) {
- /* Skip */
- } else {
- stage3 = stage3array[0];
- if (Stage3end_circularpos(stage3) > 0) {
- fp = this->fp_unpaired_circular;
- } else {
- fp = this->fp_unpaired_uniq;
- }
-
- print_header_singleend(this,fp,request,/*translocationp*/false,/*npaths*/1);
-
- queryseq1 = Request_queryseq1(request);
-#if 0
- Stage3end_eval_and_sort(stage3array,/*npaths*/1,this->maxpaths_report,queryseq1);
#endif
- Stage3end_print(fp,stage3,Stage3end_score(stage3),
- this->chromosome_iit,queryseq1,/*headerseq*/queryseq1,/*acc_suffix*/"",
- this->invert_first_p,/*hit5*/(Stage3end_T) NULL,/*hit3*/(Stage3end_T) NULL,
- /*pairlength*/0,/*pairscore*/0,/*pairtype*/UNPAIRED,
- Stage3end_mapq_score(stage3));
- if (this->print_m8_p == false) {
- fprintf(fp,"\n");
- }
- }
-
- } else if (resulttype == SINGLEEND_TRANSLOC) {
- stage3array = (Stage3end_T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
-
- if (this->failsonlyp == true) {
- /* Skip */
-
- } else if (this->quiet_if_excessive_p && npaths > this->maxpaths_report) {
- print_header_singleend(this,this->fp_unpaired_transloc,request,/*translocationp*/true,npaths);
- fprintf(this->fp_unpaired_transloc,"\n");
-
- } else {
- print_header_singleend(this,this->fp_unpaired_transloc,request,/*translocationp*/true,npaths);
-
- queryseq1 = Request_queryseq1(request);
-#if 0
- Stage3end_eval_and_sort(stage3array,npaths,this->maxpaths_report,queryseq1);
-#endif
- for (pathnum = 1; pathnum <= npaths && pathnum <= this->maxpaths_report; pathnum++) {
- stage3 = stage3array[pathnum-1];
- Stage3end_print(this->fp_unpaired_transloc,stage3,Stage3end_score(stage3),
- this->chromosome_iit,queryseq1,/*headerseq*/queryseq1,/*acc_suffix*/"",
- this->invert_first_p,/*hit5*/(Stage3end_T) NULL,/*hit3*/(Stage3end_T) NULL,
- /*pairlength*/0,/*pairscore*/0,/*pairtype*/UNPAIRED,
- Stage3end_mapq_score(stage3));
- }
- if (this->print_m8_p == false) {
- fprintf(this->fp_unpaired_transloc,"\n");
- }
- }
-
- } else if (resulttype == SINGLEEND_MULT) {
- stage3array = (Stage3end_T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
- if (this->failsonlyp == true) {
- /* Skip */
- } else if (this->quiet_if_excessive_p && npaths > this->maxpaths_report) {
- print_header_singleend(this,this->fp_unpaired_mult_xs_1,request,/*translocationp*/false,npaths);
- if (this->print_m8_p == false) {
- fprintf(this->fp_unpaired_mult_xs_1,"\n");
- }
-
- } else {
- print_header_singleend(this,this->fp_unpaired_mult,request,/*translocationp*/false,npaths);
-
- queryseq1 = Request_queryseq1(request);
-#if 0
- Stage3end_eval_and_sort(stage3array,npaths,this->maxpaths_report,queryseq1);
-#endif
- for (pathnum = 1; pathnum <= npaths && pathnum <= this->maxpaths_report; pathnum++) {
- stage3 = stage3array[pathnum-1];
- Stage3end_print(this->fp_unpaired_mult,stage3,Stage3end_score(stage3),
- this->chromosome_iit,queryseq1,/*headerseq*/queryseq1,/*acc_suffix*/"",
- this->invert_first_p,/*hit5*/(Stage3end_T) NULL,/*hit3*/(Stage3end_T) NULL,
- /*pairlength*/0,/*pairscore*/0,/*pairtype*/UNPAIRED,
- Stage3end_mapq_score(stage3));
- }
- if (this->print_m8_p == false) {
- fprintf(this->fp_unpaired_mult,"\n");
- }
- }
+#ifndef USE_MPI
+static bool
+paired_outputs_p (FILE **outputs) {
+ SAM_split_output_type split_output;
- } else {
- if (this->fp_concordant_uniq == NULL) {
- sevenway_open_paired(this);
+ split_output = N_SPLIT_OUTPUTS_SINGLE + 1;
+ while (split_output <= N_SPLIT_OUTPUTS) {
+ if (outputs[split_output] != NULL) {
+ return true;
}
- Stage3pair_print(result,resulttype,this->chromosome_iit,
- Request_queryseq1(request),Request_queryseq2(request),
- this->maxpaths_report,this->quiet_if_excessive_p,
-#if 0
- this->invert_first_p,this->invert_second_p,
-#endif
- this->nofailsp,this->failsonlyp,this->fastq_format_p,
- this->quality_shift);
+ split_output++;
}
- return;
+ return false;
}
+#endif
+#ifndef USE_MPI
static void
-print_result_goby (T this, Result_T result, Request_T request) {
- Resulttype_T resulttype;
- Shortread_T queryseq1;
- Stage3end_T *stage3array1, *stage3array2;
- Stage3pair_T *stage3pairarray;
- int npaths1 = 0, npaths2 = 0, first_absmq, second_absmq;
- bool output_alignment = true;
-
- resulttype = Result_resulttype(result);
- queryseq1 = Request_queryseq1(request);
- switch (resulttype) {
- /* Determine if we are in a TMH situation or some other condition where we */
- /* don't want to output the alignment. */
- case SINGLEEND_NOMAPPING:
- case PAIREDEND_NOMAPPING:
- /* Goby does nothing with no-mapping results. */
- output_alignment = false;
- break;
- case SINGLEEND_MULT:
- /* Check single end Too Many Hits (TMH) */
- stage3array1 = (Stage3end_T *) Result_array(&npaths1,&first_absmq,&second_absmq,result);
- if (npaths1 > this->maxpaths_report) {
- Goby_print_tmh(this->gobywriter,stage3array1[0],queryseq1,npaths1);
- output_alignment = false;
- }
- break;
- case SINGLEEND_UNIQ:
- case SINGLEEND_TRANSLOC:
- case CONCORDANT_UNIQ:
- case CONCORDANT_TRANSLOC:
- case UNPAIRED_UNIQ:
- case UNPAIRED_TRANSLOC:
- case PAIRED_UNIQ:
- case HALFMAPPING_UNIQ:
- /* output alignment but no need to check TMH. */
- break;
- case CONCORDANT_MULT:
- case PAIRED_MULT:
- stage3pairarray = (Stage3pair_T *) Result_array(&npaths1,&first_absmq,&second_absmq,result);
- if (npaths1 > this->maxpaths_report) {
- Goby_print_pair_tmh(this->gobywriter,resulttype,stage3pairarray[0],queryseq1,npaths1);
- output_alignment = false;
- }
- break;
- case UNPAIRED_MULT:
- case HALFMAPPING_TRANSLOC:
- case HALFMAPPING_MULT:
- stage3array1 = (Stage3end_T *) Result_array(&npaths1,&first_absmq,&second_absmq,result);
- stage3array2 = (Stage3end_T *) Result_array2(&npaths2,&first_absmq,&second_absmq,result);
- if (npaths1 >= this->maxpaths_report) {
- Goby_print_tmh(this->gobywriter,stage3array1[0],queryseq1,npaths1);
- }
- if (npaths2 >= this->maxpaths_report) {
- Goby_print_tmh(this->gobywriter,stage3array2[0],queryseq1,npaths2);
- }
- if (npaths1 >= this->maxpaths_report && npaths2 >= this->maxpaths_report) {
- output_alignment = false;
- }
- break;
- }
-
- if (output_alignment) {
- Goby_start_capture(this->gobywriter);
- print_result_gsnap(this,result,request);
- Goby_finish_capture(this->gobywriter);
+touch_all_files (FILE **outputs, char *split_output_root, bool appendp) {
+ touch_all_single_outputs(outputs,split_output_root,appendp);
+ if (paired_outputs_p(outputs) == true) {
+ touch_all_paired_outputs(outputs,split_output_root,appendp);
}
return;
}
-
-
-void
-Outbuffer_print_result (T this, Result_T result, Request_T request
-#ifdef MEMUSAGE
- , unsigned int noutput
-#endif
- ) {
- Shortread_T queryseq1;
-
- if (this->timingp == true) {
- queryseq1 = Request_queryseq1(request);
- printf("%s\t%.6f\n",Shortread_accession(queryseq1),Result_worker_runtime(result));
- } else if (this->output_sam_p == true) {
- print_result_sam(this,result,request);
- } else if (this->gobywriter != NULL) {
- print_result_goby(this,result,request);
- } else {
- print_result_gsnap(this,result,request);
- }
-
-#ifdef MEMUSAGE
- printf("Memusage of IN: %ld. Memusage of OUT: %ld. Entries in outbuffer: %d = %d processed - %u output\n",
- Mem_usage_report_in(),Mem_usage_report_out(),this->nprocessed - noutput,this->nprocessed,noutput);
#endif
- return;
-}
-#else
-/************************************************************************
- * Print routines and threads for GMAP
- ************************************************************************/
+void
+Outbuffer_close_files () {
+ SAM_split_output_type split_output;
-static void
-print_npaths (T this, FILE *fp, int npaths, Diagnostic_T diagnostic,
- char *chrsubset_name, bool mergedp, Chimera_T chimera, Failure_T failuretype) {
+ if (failedinput_root != NULL) {
+#ifdef USE_MPI
+#ifdef GSNAP
+ MPI_File_close(&output_failedinput_1);
+ MPI_File_close(&output_failedinput_2);
+#else
+ MPI_File_close(&output_failedinput);
+#endif
+
+#else
+#ifdef GSNAP
+ fclose(output_failedinput_1);
+ fclose(output_failedinput_2);
+#else
+ fclose(output_failedinput);
+#endif
+#endif
- if (this->diagnosticp == true) {
- Diagnostic_print(diagnostic);
}
- if (npaths == 0) {
- fprintf(fp,"Paths (0):");
- } else if (mergedp == true) {
- fprintf(fp,"Paths (1):");
+#ifdef USE_MPI
+ if (split_output_root != NULL) {
+ for (split_output = 1; split_output <= N_SPLIT_OUTPUTS; split_output++) {
+ MPI_File_close(&(outputs[split_output]));
+ }
+ } else if (output_file != NULL) {
+ MPI_File_close(&(outputs[0]));
} else {
- fprintf(fp,"Paths (%d):",npaths);
- }
- if (chrsubset_name != NULL) {
- printf(" [chrsubset: %s]",chrsubset_name);
+ /* Wrote to stdout */
}
- if (failuretype == NO_FAILURE) {
- if (chimera != NULL) {
- Chimera_print(fp,chimera);
+
+#else
+ if (split_output_root != NULL) {
+ touch_all_files(outputs,split_output_root,appendp);
+
+ for (split_output = 1; split_output <= N_SPLIT_OUTPUTS; split_output++) {
+ if (outputs[split_output] != NULL) {
+ fclose(outputs[split_output]);
+ }
}
- } else if (failuretype == EMPTY_SEQUENCE) {
- fprintf(fp," *** Empty sequence ***");
- } else if (failuretype == SHORT_SEQUENCE) {
- fprintf(fp," *** Short sequence < index oligo size ***");
- } else if (failuretype == POOR_SEQUENCE) {
- fprintf(fp," *** Poor sequence (use -p flag to change pruning behavior) ***");
- } else if (failuretype == REPETITIVE) {
- fprintf(fp," *** Repetitive sequence (use -p flag to change pruning behavior) ***");
- }
- fprintf(fp,"\n");
- if (npaths == 0) {
- fprintf(fp,"\n");
+ } else if (output_file != NULL) {
+ fclose(outputs[0]);
+ } else {
+ /* Wrote to stdout */
}
+#endif
+
+ FREE_KEEP(outputs);
+
return;
}
void
-Outbuffer_print_result (T this, Result_T result, Request_T request, Sequence_T headerseq
-#ifdef MEMUSAGE
- , unsigned int noutput
-#endif
- ) {
- FILE *fp;
- char *abbrev;
- Sequence_T queryseq;
- Diagnostic_T diagnostic;
- Stage3_T *stage3array;
- int npaths, pathnum, effective_maxpaths, first_absmq, second_absmq;
- Chimera_T chimera = NULL;
- int chimerapos, chimeraequivpos, chimera_cdna_direction;
- int querylength;
- double donor_prob, acceptor_prob;
- List_T p;
- Gregion_T gregion;
- bool printp, mergedp = false;
-#ifdef MEMUSAGE
- char *comma0, *comma1, *comma2, *comma3;
-#endif
-
- queryseq = Request_queryseq(request);
-
- if (this->stage1debug == true) {
- putc('>',stdout);
- Sequence_print_header(stdout,headerseq,this->checksump);
-
- for (p = Result_gregionlist(result); p != NULL; p = List_next(p)) {
- gregion = (Gregion_T) List_head(p);
- Gregion_print(gregion);
- }
- return;
+Outbuffer_free (T *old) {
- } else if (this->diag_debug == true) {
- putc('>',stdout);
- Sequence_print_header(stdout,headerseq,this->checksump);
+ if (*old) {
+#ifdef HAVE_PTHREAD
+ pthread_cond_destroy(&(*old)->filestring_avail_p);
+ pthread_mutex_destroy(&(*old)->lock);
+#endif
- Diag_print_segments(Result_diagonals(result),/*queryseq_ptr*/NULL,/*genomicseg_ptr*/NULL);
- return;
+ FREE_KEEP(*old);
}
- stage3array = Result_array(&npaths,&first_absmq,&second_absmq,result);
- querylength = Sequence_fulllength_given(queryseq);
-
- chimerapos = chimeraequivpos = -1;
- chimera_cdna_direction = 0;
- donor_prob = acceptor_prob = 0.0;
+ return;
+}
- /* Translation */
- if (npaths == 0) {
- effective_maxpaths = 0;
- fp = this->fp_nomapping;
- abbrev = ABBREV_NOMAPPING_1;
- if (this->nofailsp == true) {
- printp = false;
- } else {
- printp = true;
- }
+unsigned int
+Outbuffer_nread (T this) {
+ return this->nread;
+}
- if (Result_failuretype(result) == POOR_SEQUENCE) {
- fprintf(stderr,"Accession %s skipped (poor sequence). Use -p flag to change pruning behavior\n",Sequence_accession(headerseq));
- } else if (Result_failuretype(result) == REPETITIVE) {
- fprintf(stderr,"Accession %s skipped (repetitive sequence). Use -p flag to change pruning behavior\n",Sequence_accession(headerseq));
- } else {
- fprintf(stderr,"No paths found for %s\n",Sequence_accession(headerseq));
- }
+unsigned int
+Outbuffer_nbeyond (T this) {
+ return this->nbeyond;
+}
- } else if ((mergedp = Result_mergedp(result)) == true) {
- if (Stage3_circularpos(stage3array[0]) > 0) {
- fp = this->fp_circular;
- abbrev = ABBREV_UNPAIRED_CIRCULAR;
- } else {
- fp = this->fp_uniq;
- abbrev = ABBREV_UNPAIRED_UNIQ;
- }
- effective_maxpaths = 1;
- if (this->failsonlyp == true) {
- printp = false;
- } else {
- printp = true;
+void
+Outbuffer_add_nread (T this, unsigned int nread) {
- for (pathnum = 1; pathnum <= /*effective_maxpaths*/1; pathnum++) {
- Stage3_translate(stage3array[pathnum-1],
-#ifdef PMAP
- queryseq,this->diagnosticp,
+#ifdef HAVE_PTHREAD
+ pthread_mutex_lock(&this->lock);
#endif
- querylength,this->fulllengthp,
- this->cds_startpos,this->truncatep,this->strictp,
- this->maponlyp);
- }
- }
-
- } else if ((chimera = Result_chimera(result)) != NULL) {
- if (this->chimeras_allowed_p == true) {
- effective_maxpaths = 2;
- } else {
- effective_maxpaths = 0;
- }
- fp = this->fp_transloc;
- abbrev = ABBREV_UNPAIRED_TRANSLOC;
- if (this->failsonlyp == true) {
- printp = false;
+ if (nread == 0) {
+ /* Finished reading, so able to determine total reads in input */
+ this->ntotal = this->nread;
+ debug(fprintf(stderr,"__Outbuffer_add_nread added 0 reads, so setting ntotal to be %u\n",this->ntotal));
-#if 0
- } else if (this->quiet_if_excessive_p && npaths > this->maxpaths_report) {
- /* Counting a chimera as a single path */
- printp = true;
+#ifdef HAVE_PTHREAD
+ pthread_cond_signal(&this->filestring_avail_p);
#endif
- } else {
- printp = true;
-
- chimerapos = Chimera_pos(chimera);
- chimeraequivpos = Chimera_equivpos(chimera);
- donor_prob = Chimera_donor_prob(chimera);
- acceptor_prob = Chimera_acceptor_prob(chimera);
- chimera_cdna_direction = Chimera_cdna_direction(chimera);
-
- Stage3_translate_chimera(stage3array[0],stage3array[1],
-#ifdef PMAP
- queryseq,this->diagnosticp,
+ } else {
+ this->nread += nread;
+#ifdef USE_MPI
+ this->ntotal = this->nread;
#endif
- querylength,this->fulllengthp,
- this->cds_startpos,this->truncatep,this->strictp,
- this->maponlyp);
- }
-
- } else if (this->maxpaths_report == 0) {
- effective_maxpaths = 1;
- if (npaths > 1) {
- fp = this->fp_mult;
- abbrev = ABBREV_UNPAIRED_MULT;
- } else if (Stage3_circularpos(stage3array[0]) > 0) {
- fp = this->fp_circular;
- abbrev = ABBREV_UNPAIRED_CIRCULAR;
- } else {
- fp = this->fp_uniq;
- abbrev = ABBREV_UNPAIRED_UNIQ;
- }
-
- if (this->failsonlyp == true) {
- printp = false;
- } else if (this->quiet_if_excessive_p && npaths > this->maxpaths_report) {
- printp = false;
- } else {
- printp = true;
+ debug(fprintf(stderr,"__Outbuffer_add_nread added %d read, now %d\n",nread,this->nread));
+ }
- Stage3_translate(stage3array[0],
-#ifdef PMAP
- queryseq,this->diagnosticp,
+#ifdef HAVE_PTHREAD
+ pthread_mutex_unlock(&this->lock);
#endif
- querylength,this->fulllengthp,
- this->cds_startpos,this->truncatep,this->strictp,
- this->maponlyp);
- }
- } else {
- if (npaths > 1) {
- fp = this->fp_mult;
- abbrev = ABBREV_UNPAIRED_MULT;
- } else if (Stage3_circularpos(stage3array[0]) > 0) {
- fp = this->fp_circular;
- abbrev = ABBREV_UNPAIRED_CIRCULAR;
- } else {
- fp = this->fp_uniq;
- abbrev = ABBREV_UNPAIRED_UNIQ;
- }
+ return;
+}
- if (npaths < this->maxpaths_report) {
- effective_maxpaths = npaths;
- } else {
- effective_maxpaths = this->maxpaths_report;
- }
- if (this->failsonlyp == true) {
- printp = false;
- } else if (this->quiet_if_excessive_p && npaths > this->maxpaths_report) {
- printp = false;
- } else {
- printp = true;
+void
+Outbuffer_add_nbeyond (T this) {
- for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
- Stage3_translate(stage3array[pathnum-1],
-#ifdef PMAP
- queryseq,this->diagnosticp,
+#ifdef HAVE_PTHREAD
+ pthread_mutex_lock(&this->lock);
#endif
- querylength,this->fulllengthp,
- this->cds_startpos,this->truncatep,this->strictp,
- this->maponlyp);
- }
- }
- }
- /* Printing */
- if (this->debug_graphic_p == true) {
- printf("q()\n");
+ this->nbeyond += 1;
- } else if (printp == false) {
- /* No output, either because of --nofails or --quiet-if-excessive */
+#ifdef HAVE_PTHREAD
+ pthread_cond_signal(&this->filestring_avail_p);
+ pthread_mutex_unlock(&this->lock);
+#endif
- } else {
- if (this->failedinput_root != NULL &&
- (npaths == 0 || (this->quiet_if_excessive_p && npaths > this->maxpaths_report))) {
- putc('>',this->fp_failedinput_1);
- Sequence_print_header(this->fp_failedinput_1,headerseq,this->checksump);
- Sequence_print(this->fp_failedinput_1,queryseq,/*uppercasep*/false,this->wraplength,/*trimmedp*/false);
- }
+ return;
+}
- if (this->printtype == SIMPLE || this->printtype == SUMMARY || this->printtype == ALIGNMENT) {
- /* Print header, even if no alignment is found */
- putc('>',fp);
- Sequence_print_header(fp,headerseq,this->checksump);
- diagnostic = Result_diagnostic(result);
- if (npaths == 0) {
- print_npaths(this,fp,0,diagnostic,this->chrsubset_name,
- /*mergedp*/false,/*chimera*/NULL,Result_failuretype(result));
+#ifdef GSNAP
+void
+Outbuffer_put_filestrings (T this, Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2) {
+#ifdef HAVE_PTHREAD
+ pthread_mutex_lock(&this->lock);
+#endif
- } else {
- print_npaths(this,fp,npaths,diagnostic,this->chrsubset_name,mergedp,chimera,NO_FAILURE);
- for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
- Stage3_print_pathsummary(fp,stage3array[pathnum-1],pathnum,
- this->chromosome_iit,this->contig_iit,
- this->altstrain_iit,queryseq,
- this->dbversion,/*maxmutations*/1000000,
- this->diagnosticp,this->maponlyp);
- }
- }
+ this->tail = RRlist_push(&this->head,this->tail,fp,fp_failedinput_1,fp_failedinput_2);
+ debug1(RRlist_dump(this->head,this->tail));
+ this->nprocessed += 1;
- if (this->printtype != SIMPLE) {
- fprintf(fp,"Alignments:\n");
- for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
- fprintf(fp," Alignment for path %d:\n\n",pathnum);
- Stage3_print_alignment(fp,stage3array[pathnum-1],
- this->genome,this->chromosome_iit,this->printtype,
- /*continuousp*/false,/*continuous_by_exon_p*/false,
- this->diagnosticp,/*flipgenomep*/true,
- this->invertmode,this->nointronlenp,
- this->wraplength);
- }
- }
+#ifdef HAVE_PTHREAD
+ debug(printf("Signaling that filestring is available\n"));
+ pthread_cond_signal(&this->filestring_avail_p);
+ pthread_mutex_unlock(&this->lock);
+#endif
- if (this->map_iit != NULL) {
- fprintf(fp,"Maps:\n");
- for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
- Stage3_print_map(fp,stage3array[pathnum-1],this->map_iit,this->map_divint_crosstable,
- this->chromosome_iit,pathnum,this->map_exons_p,this->map_bothstrands_p,
- this->nflanking,this->print_comment_p);
- }
- }
+ return;
+}
- } else if (this->printtype == COMPRESSED) {
- for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
- Stage3_print_compressed(fp,stage3array[pathnum-1],queryseq,this->chromosome_iit,
- this->dbversion,this->usersegment,pathnum,npaths,
- this->checksump,chimerapos,chimeraequivpos,
- donor_prob,acceptor_prob,chimera_cdna_direction);
- }
+#else
+void
+Outbuffer_put_filestrings (T this, Filestring_T fp, Filestring_T fp_failedinput) {
- } else if (this->printtype == CONTINUOUS) {
- putc('>',fp);
- Sequence_print_header(fp,headerseq,this->checksump);
- if (npaths == 0) {
- fprintf(fp,"\n\n\n");
- } else {
- Stage3_print_alignment(fp,stage3array[0],this->genome,this->chromosome_iit,this->printtype,
- /*continuousp*/true,/*continuous_by_exon_p*/false,
- this->diagnosticp,/*flipgenomep*/true,
- this->invertmode,this->nointronlenp,
- this->wraplength);
- }
+#ifdef HAVE_PTHREAD
+ pthread_mutex_lock(&this->lock);
+#endif
- } else if (this->printtype == CONTINUOUS_BY_EXON) {
- diagnostic = Result_diagnostic(result);
+ this->tail = RRlist_push(&this->head,this->tail,fp,fp_failedinput);
+ debug1(RRlist_dump(this->head,this->tail));
+ this->nprocessed += 1;
- putc('>',fp);
- Sequence_print_header(fp,headerseq,this->checksump);
- print_npaths(this,fp,npaths,diagnostic,this->chrsubset_name,mergedp,chimera,NO_FAILURE);
- if (npaths == 0) {
- fprintf(fp,"\n\n\n");
- } else {
- Stage3_print_pathsummary(fp,stage3array[0],/*pathnum*/1,
- this->chromosome_iit,this->contig_iit,
- this->altstrain_iit,queryseq,
- this->dbversion,/*maxmutations*/1000000,
- this->diagnosticp,this->maponlyp);
- fprintf(fp,"Alignments:\n");
- fprintf(fp," Alignment for path %d:\n\n",/*pathnum*/1);
- Stage3_print_alignment(fp,stage3array[0],this->genome,this->chromosome_iit,this->printtype,
- /*continuousp*/false,/*continuous_by_exon_p*/true,
- this->diagnosticp,/*flipgenomep*/true,
- this->invertmode,this->nointronlenp,
- this->wraplength);
- }
+#ifdef HAVE_PTHREAD
+ debug(printf("Signaling that filestring is available\n"));
+ pthread_cond_signal(&this->filestring_avail_p);
+ pthread_mutex_unlock(&this->lock);
+#endif
- } else if (this->printtype == EXONS_CDNA) {
- putc('>',fp);
- Sequence_print_header(fp,headerseq,this->checksump);
- for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
- fprintf(fp,"<path %d>\n",pathnum);
- Pair_print_exons(fp,Stage3_pairarray(stage3array[0]),Stage3_npairs(stage3array[0]),
- this->wraplength,this->ngap,/*cdna*/true);
- fprintf(fp,"</path>\n");
- }
+ return;
+}
+#endif
- } else if (this->printtype == EXONS_GENOMIC) {
- putc('>',fp);
- Sequence_print_header(fp,headerseq,this->checksump);
- for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
- fprintf(fp,"<path %d>\n",pathnum);
- Pair_print_exons(fp,Stage3_pairarray(stage3array[0]),Stage3_npairs(stage3array[0]),
- this->wraplength,this->ngap,/*cdna*/false);
- fprintf(fp,"</path>\n");
- }
- } else if (this->printtype == CDNA) {
- for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
- putc('>',fp);
- Sequence_print_header(fp,headerseq,this->checksump);
- Stage3_print_cdna(fp,stage3array[pathnum-1],this->wraplength);
- }
- } else if (this->printtype == PROTEIN_GENOMIC) {
- for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
- putc('>',fp);
- Sequence_print_header(fp,headerseq,this->checksump);
- Stage3_print_protein_genomic(fp,stage3array[pathnum-1],this->wraplength);
- }
+#ifdef GSNAP
+void
+Outbuffer_print_filestrings (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2) {
+ SAM_split_output_type split_output;
+#ifdef USE_MPI
+ MPI_File output;
+#else
+ FILE *output;
+#endif
- } else if (this->printtype == PSL_NT) {
- for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
- Stage3_print_pslformat_nt(fp,stage3array[pathnum-1],
- this->chromosome_iit,this->usersegment,queryseq);
- }
+#ifdef USE_MPI
+ split_output = Filestring_split_output(fp);
+ output = outputs[split_output];
-#ifdef PMAP
- } else if (this->printtype == PSL_PRO) {
- for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
- Stage3_print_pslformat_pro(fp,stage3array[pathnum-1],
- this->chromosome_iit,this->usersegment,queryseq,this->strictp);
+#else
+ if (split_output_root != NULL) {
+ split_output = Filestring_split_output(fp);
+ if ((output = outputs[split_output]) == NULL) {
+ output = outputs[split_output] = SAM_header_open_file(split_output,split_output_root,appendp);
+ if (split_output == OUTPUT_NONE && split_output_root != NULL) {
+ /* Don't print file headers, since no output will go to
+ stdout. Must be a nomapping when --nofails is specified */
+ } else {
+ print_file_headers(output);
}
+ }
+ } else if ((output = outputs[0]) == NULL) {
+ if (output_file == NULL) {
+ output = outputs[0] = stdout;
+ print_file_headers(stdout);
+ } else {
+ output = outputs[0] = SAM_header_open_file(/*split_output*/OUTPUT_NONE,output_file,appendp);
+ print_file_headers(output);
+ }
+ }
#endif
- } else if (this->printtype == GFF3_GENE || this->printtype == GFF3_MATCH_CDNA ||
- this->printtype == GFF3_MATCH_EST) {
- for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
- Stage3_print_gff3(fp,stage3array[pathnum-1],pathnum,
- this->chromosome_iit,this->usersegment,queryseq,querylength,this->printtype,
- /*sourcename*/this->usersegment ? this->user_genomicseg : this->dbversion);
- }
-
-#ifndef PMAP
- } else if (this->printtype == SAM) {
- if (npaths == 0) {
- Pair_print_sam_nomapping(fp,abbrev,/*acc1*/Sequence_accession(headerseq),/*acc2*/NULL,
- Sequence_fullpointer(queryseq),Sequence_quality_string(queryseq),
- Sequence_fulllength(queryseq),this->quality_shift,
- Sequence_firstp(queryseq),this->sam_paired_p,this->sam_read_group_id);
-
- } else if (this->quiet_if_excessive_p && npaths > this->maxpaths_report) {
- Pair_print_sam_nomapping(fp,abbrev,/*acc1*/Sequence_accession(headerseq),/*acc2*/NULL,
- Sequence_fullpointer(queryseq),Sequence_quality_string(queryseq),
- Sequence_fulllength(queryseq),this->quality_shift,
- Sequence_firstp(queryseq),this->sam_paired_p,this->sam_read_group_id);
-
- } else if (mergedp == true) {
- Stage3_print_sam(fp,abbrev,stage3array[0],/*pathnum*/1,/*npaths*/1,
- Stage3_absmq_score(stage3array[0]),first_absmq,second_absmq,
- Stage3_mapq_score(stage3array[0]),
- this->chromosome_iit,this->usersegment,queryseq,
- /*chimera_part*/0,/*chimera*/NULL,this->quality_shift,this->sam_paired_p,
- this->sam_read_group_id);
-
- } else if (chimera != NULL) {
- Stage3_print_sam(fp,abbrev,stage3array[0],/*pathnum*/1,npaths,
- Stage3_absmq_score(stage3array[0]),first_absmq,second_absmq,
- Stage3_mapq_score(stage3array[0]),
- this->chromosome_iit,this->usersegment,queryseq,
- /*chimera_part*/-1,chimera,this->quality_shift,this->sam_paired_p,
- this->sam_read_group_id);
- Stage3_print_sam(fp,abbrev,stage3array[1],/*pathnum*/1,npaths,
- Stage3_absmq_score(stage3array[0]),first_absmq,second_absmq,
- Stage3_mapq_score(stage3array[0]),
- this->chromosome_iit,this->usersegment,queryseq,
- /*chimera_part*/+1,chimera,this->quality_shift,this->sam_paired_p,
- this->sam_read_group_id);
+#ifdef USE_MPI
+ /* Prevents output from being broken up */
+ Filestring_stringify(fp);
+#endif
+ Filestring_print(output,fp);
+ Filestring_free(&fp);
- } else {
- for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
- Stage3_print_sam(fp,abbrev,stage3array[pathnum-1],pathnum,npaths,
- Stage3_absmq_score(stage3array[pathnum-1]),first_absmq,second_absmq,
- Stage3_mapq_score(stage3array[pathnum-1]),
- this->chromosome_iit,this->usersegment,queryseq,
- /*chimera_part*/0,/*chimera*/NULL,this->quality_shift,this->sam_paired_p,
- this->sam_read_group_id);
- }
- }
+ if (failedinput_root != NULL) {
+ if (fp_failedinput_1 != NULL) {
+#ifdef USE_MPI
+ Filestring_stringify(fp_failedinput_1);
+#endif
+ Filestring_print(output_failedinput_1,fp_failedinput_1);
+ Filestring_free(&fp_failedinput_1);
+ }
+ if (fp_failedinput_2 != NULL) {
+#ifdef USE_MPI
+ Filestring_stringify(fp_failedinput_2);
#endif
+ Filestring_print(output_failedinput_2,fp_failedinput_2);
+ Filestring_free(&fp_failedinput_2);
+ }
+ }
- } else if (this->printtype == COORDS) {
- for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
- fprintf(fp,">");
- Sequence_print_header(fp,headerseq,this->checksump);
- Stage3_print_coordinates(fp,stage3array[pathnum-1],this->chromosome_iit,this->invertmode);
- }
+ return;
+}
- } else if (this->printtype == SPLICESITES) {
- /* Print only best path */
- if (npaths > 0) {
- Stage3_print_splicesites(fp,stage3array[0],this->chromosome_iit,queryseq);
- }
+#else
+void
+Outbuffer_print_filestrings (Filestring_T fp, Filestring_T fp_failedinput) {
+ SAM_split_output_type split_output;
+#ifdef USE_MPI
+ MPI_File output;
+#else
+ FILE *output;
+#endif
- } else if (this->printtype == INTRONS) {
- /* Print only best path */
- if (npaths > 0) {
- Stage3_print_introns(fp,stage3array[0],this->chromosome_iit,queryseq);
- }
+#ifdef USE_MPI
+ split_output = Filestring_split_output(fp);
+ output = outputs[split_output];
- } else if (this->printtype == MAP_RANGES) {
- for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
- Stage3_print_iit_map(fp,stage3array[pathnum-1],this->chromosome_iit,queryseq);
- }
-
- } else if (this->printtype == MAP_EXONS) {
- for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
- Stage3_print_iit_exon_map(fp,stage3array[pathnum-1],this->chromosome_iit,queryseq);
+#else
+ if (split_output_root != NULL) {
+ split_output = Filestring_split_output(fp);
+ if ((output = outputs[split_output]) == NULL) {
+ output = outputs[split_output] = SAM_header_open_file(split_output,split_output_root,appendp);
+ if (split_output == OUTPUT_NONE && split_output_root != NULL) {
+ /* Don't print file headers, since no output will go to
+ stdout. Must be a nomapping when --nofails is specified */
+ } else {
+ print_file_headers(output);
}
+ }
+ } else if ((output = outputs[0]) == NULL) {
+ if (output_file == NULL) {
+ output = outputs[0] = stdout;
+ print_file_headers(stdout);
} else {
- fprintf(stderr,"Unexpected printtype %d\n",this->printtype);
- abort();
-
+ output = outputs[0] = SAM_header_open_file(/*split_output*/OUTPUT_NONE,output_file,appendp);
+ print_file_headers(output);
}
}
+#endif
+
+#ifdef USE_MPI
+ Filestring_stringify(fp);
+#endif
+ Filestring_print(output,fp);
+ Filestring_free(&fp);
-#ifdef MEMUSAGE
- comma0 = Genomicpos_commafmt(Mem_usage_report_std_stack());
- comma1 = Genomicpos_commafmt(Mem_usage_report_std_stack_max());
- comma2 = Genomicpos_commafmt(Mem_usage_report_std_heap());
- comma3 = Genomicpos_commafmt(Mem_usage_report_std_heap_max());
- printf("Stack: %s (max %s). Heap: %s (max %s).\n",comma0,comma1,comma2,comma3);
- FREE(comma3);
- FREE(comma2);
- FREE(comma1);
- FREE(comma0);
+ if (failedinput_root != NULL) {
+ if (fp_failedinput != NULL) {
+#ifdef USE_MPI
+ Filestring_stringify(fp_failedinput);
#endif
+ Filestring_print(output_failedinput,fp_failedinput);
+ Filestring_free(&fp_failedinput);
+ }
+ }
return;
}
@@ -2522,33 +1006,35 @@ Outbuffer_print_result (T this, Result_T result, Request_T request, Sequence_T h
#endif
+
void *
Outbuffer_thread_anyorder (void *data) {
T this = (T) data;
unsigned int output_buffer_size = this->output_buffer_size;
- unsigned int noutput = 0, ntotal;
- Result_T result;
- Request_T request;
-
-#ifdef MEMUSAGE
- Mem_usage_set_threadname("outbuffer");
+ unsigned int noutput = 0, ntotal, nbeyond;
+ Filestring_T fp;
+#ifdef GSNAP
+ Filestring_T fp_failedinput_1, fp_failedinput_2;
+#else
+ Filestring_T fp_failedinput;
#endif
-
+
/* Obtain this->ntotal while locked, to prevent race between output thread and input thread */
#ifdef HAVE_PTHREAD
pthread_mutex_lock(&this->lock);
#endif
ntotal = this->ntotal;
+ nbeyond = this->nbeyond;
#ifdef HAVE_PTHREAD
pthread_mutex_unlock(&this->lock);
#endif
- while (noutput < ntotal) { /* Previously check against this->ntotal */
+ while (noutput + nbeyond < ntotal) { /* Previously check against this->ntotal */
#ifdef HAVE_PTHREAD
pthread_mutex_lock(&this->lock);
- while (this->head == NULL && noutput < this->ntotal) {
- debug(fprintf(stderr,"__outbuffer_thread_anyorder waiting for result_avail_p\n"));
- pthread_cond_wait(&this->result_avail_p,&this->lock);
+ while (this->head == NULL && noutput + this->nbeyond < this->ntotal) {
+ debug(fprintf(stderr,"__outbuffer_thread_anyorder waiting for filestring_avail_p\n"));
+ pthread_cond_wait(&this->filestring_avail_p,&this->lock);
}
debug(fprintf(stderr,"__outbuffer_thread_anyorder woke up\n"));
#endif
@@ -2560,29 +1046,26 @@ Outbuffer_thread_anyorder (void *data) {
#endif
} else {
- this->head = RRlist_pop(this->head,&request,&result);
+#ifdef GSNAP
+ this->head = RRlist_pop(this->head,&fp,&fp_failedinput_1,&fp_failedinput_2);
+#else
+ this->head = RRlist_pop(this->head,&fp,&fp_failedinput);
+#endif
debug1(RRlist_dump(this->head,this->tail));
#ifdef HAVE_PTHREAD
- /* Let worker threads put results while we print */
+ /* Let worker threads put filestrings while we print */
pthread_mutex_unlock(&this->lock);
#endif
-#ifdef MEMUSAGE
- Outbuffer_print_result(this,result,request,
-#ifndef GSNAP
- Request_queryseq(request),
-#endif
- noutput+1);
+
+#ifdef GSNAP
+ Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2);
#else
- Outbuffer_print_result(this,result,request
-#ifndef GSNAP
- ,Request_queryseq(request)
+ Outbuffer_print_filestrings(fp,fp_failedinput);
#endif
- );
-#endif
- Result_free(&result);
- Request_free(&request);
- noutput++;
+ noutput += 1;
+ /* Result_free(&result); */
+ /* Request_free(&request); */
#ifdef HAVE_PTHREAD
pthread_mutex_lock(&this->lock);
@@ -2590,38 +1073,37 @@ Outbuffer_thread_anyorder (void *data) {
if (this->head && this->nprocessed - noutput > output_buffer_size) {
/* Clear out backlog */
while (this->head && this->nprocessed - noutput > output_buffer_size) {
- this->head = RRlist_pop(this->head,&request,&result);
+#ifdef GSNAP
+ this->head = RRlist_pop(this->head,&fp,&fp_failedinput_1,&fp_failedinput_2);
+#else
+ this->head = RRlist_pop(this->head,&fp,&fp_failedinput);
+#endif
debug1(RRlist_dump(this->head,this->tail));
-#ifdef MEMUSAGE
- Outbuffer_print_result(this,result,request,
-#ifndef GSNAP
- Request_queryseq(request),
-#endif
- noutput+1);
+#ifdef GSNAP
+ Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2);
#else
- Outbuffer_print_result(this,result,request
-#ifndef GSNAP
- ,Request_queryseq(request)
+ Outbuffer_print_filestrings(fp,fp_failedinput);
#endif
- );
-#endif
- Result_free(&result);
- Request_free(&request);
- noutput++;
+ noutput += 1;
+ /* Result_free(&result); */
+ /* Request_free(&request); */
}
}
#ifdef HAVE_PTHREAD
pthread_mutex_unlock(&this->lock);
#endif
-
}
+ debug(fprintf(stderr,"__outbuffer_thread_anyorder has noutput %d, nbeyond %d, ntotal %d\n",
+ noutput,nbeyond,ntotal));
+
/* Obtain this->ntotal while locked, to prevent race between output thread and input thread */
#ifdef HAVE_PTHREAD
pthread_mutex_lock(&this->lock);
#endif
ntotal = this->ntotal;
+ nbeyond = this->nbeyond;
#ifdef HAVE_PTHREAD
pthread_mutex_unlock(&this->lock);
#endif
@@ -2638,89 +1120,90 @@ void *
Outbuffer_thread_ordered (void *data) {
T this = (T) data;
unsigned int output_buffer_size = this->output_buffer_size;
- unsigned int noutput = 0, nqueued = 0, ntotal;
- Result_T result;
- Request_T request;
+ unsigned int noutput = 0, nqueued = 0, ntotal, nbeyond;
+ Filestring_T fp;
+#ifdef GSNAP
+ Filestring_T fp_failedinput_1, fp_failedinput_2;
+#else
+ Filestring_T fp_failedinput;
+#endif
RRlist_T queue = NULL;
int id;
-#ifdef MEMUSAGE
- Mem_usage_set_threadname("outbuffer");
-#endif
-
/* Obtain this->ntotal while locked, to prevent race between output thread and input thread */
#ifdef HAVE_PTHREAD
pthread_mutex_lock(&this->lock);
#endif
ntotal = this->ntotal;
+ nbeyond = this->nbeyond;
#ifdef HAVE_PTHREAD
pthread_mutex_unlock(&this->lock);
#endif
- while (noutput < ntotal) { /* Previously checked against this->ntotal */
+ while (noutput + nbeyond < ntotal) { /* Previously checked against this->ntotal */
#ifdef HAVE_PTHREAD
pthread_mutex_lock(&this->lock);
- while (this->head == NULL && noutput < this->ntotal) {
- pthread_cond_wait(&this->result_avail_p,&this->lock);
+ while (this->head == NULL && noutput + this->nbeyond < this->ntotal) {
+ pthread_cond_wait(&this->filestring_avail_p,&this->lock);
}
debug(fprintf(stderr,"__outbuffer_thread_ordered woke up\n"));
#endif
if (this->head == NULL) {
#ifdef HAVE_PTHREAD
- /* False wake up */
+ /* False wake up, or signal from worker_mpi_process */
+ ntotal = this->ntotal;
+ nbeyond = this->nbeyond;
pthread_mutex_unlock(&this->lock);
#endif
} else {
- this->head = RRlist_pop(this->head,&request,&result);
+#ifdef GSNAP
+ this->head = RRlist_pop(this->head,&fp,&fp_failedinput_1,&fp_failedinput_2);
+#else
+ this->head = RRlist_pop(this->head,&fp,&fp_failedinput);
+#endif
#ifdef HAVE_PTHREAD
/* Allow workers access to the queue */
pthread_mutex_unlock(&this->lock);
#endif
- if ((id = Result_id(result)) != (int) noutput) {
+ if ((id = Filestring_id(fp)) != (int) noutput) {
/* Store in queue */
- queue = RRlist_insert(queue,id,request,result);
+#ifdef GSNAP
+ queue = RRlist_insert(queue,id,fp,fp_failedinput_1,fp_failedinput_2);
+#else
+ queue = RRlist_insert(queue,id,fp,fp_failedinput);
+#endif
nqueued++;
} else {
-#ifdef MEMUSAGE
- Outbuffer_print_result(this,result,request,
-#ifndef GSNAP
- Request_queryseq(request),
-#endif
- noutput+1);
+#ifdef GSNAP
+ Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2);
#else
- Outbuffer_print_result(this,result,request
-#ifndef GSNAP
- ,Request_queryseq(request)
-#endif
- );
+ Outbuffer_print_filestrings(fp,fp_failedinput);
#endif
- Result_free(&result);
- Request_free(&request);
- noutput++;
+ noutput += 1;
+
+ /* Result_free(&result); */
+ /* Request_free(&request); */
/* Print out rest of stored queue */
while (queue != NULL && queue->id == (int) noutput) {
- queue = RRlist_pop_id(queue,&id,&request,&result);
- nqueued--;
-#ifdef MEMUSAGE
- Outbuffer_print_result(this,result,request,
-#ifndef GSNAP
- Request_queryseq(request),
-#endif
- noutput+1);
+#ifdef GSNAP
+ queue = RRlist_pop_id(queue,&id,&fp,&fp_failedinput_1,&fp_failedinput_2);
#else
- Outbuffer_print_result(this,result,request
-#ifndef GSNAP
- ,Request_queryseq(request)
+ queue = RRlist_pop_id(queue,&id,&fp,&fp_failedinput);
#endif
- );
+ nqueued--;
+#ifdef GSNAP
+ Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2);
+#else
+ Outbuffer_print_filestrings(fp,fp_failedinput);
#endif
- Result_free(&result);
- Request_free(&request);
- noutput++;
+ noutput += 1;
+
+ /* Result_free(&result); */
+ /* Request_free(&request); */
}
}
@@ -2730,64 +1213,64 @@ Outbuffer_thread_ordered (void *data) {
if (this->head && this->nprocessed - nqueued - noutput > output_buffer_size) {
/* Clear out backlog */
while (this->head && this->nprocessed - nqueued - noutput > output_buffer_size) {
- this->head = RRlist_pop(this->head,&request,&result);
- if ((id = Result_id(result)) != (int) noutput) {
+#ifdef GSNAP
+ this->head = RRlist_pop(this->head,&fp,&fp_failedinput_1,&fp_failedinput_2);
+#else
+ this->head = RRlist_pop(this->head,&fp,&fp_failedinput);
+#endif
+ if ((id = Filestring_id(fp)) != (int) noutput) {
/* Store in queue */
- queue = RRlist_insert(queue,id,request,result);
+#ifdef GSNAP
+ queue = RRlist_insert(queue,id,fp,fp_failedinput_1,fp_failedinput_2);
+#else
+ queue = RRlist_insert(queue,id,fp,fp_failedinput);
+#endif
nqueued++;
} else {
-#ifdef MEMUSAGE
- Outbuffer_print_result(this,result,request,
-#ifndef GSNAP
- Request_queryseq(request),
-#endif
- noutput+1);
+#ifdef GSNAP
+ Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2);
#else
- Outbuffer_print_result(this,result,request
-#ifndef GSNAP
- ,Request_queryseq(request)
-#endif
- );
+ Outbuffer_print_filestrings(fp,fp_failedinput);
#endif
- Result_free(&result);
- Request_free(&request);
- noutput++;
+ noutput += 1;
+ /* Result_free(&result); */
+ /* Request_free(&request); */
/* Print out rest of stored queue */
while (queue != NULL && queue->id == (int) noutput) {
- queue = RRlist_pop_id(queue,&id,&request,&result);
- nqueued--;
-#ifdef MEMUSAGE
- Outbuffer_print_result(this,result,request,
-#ifndef GSNAP
- Request_queryseq(request),
-#endif
- noutput+1);
+#ifdef GSNAP
+ queue = RRlist_pop_id(queue,&id,&fp,&fp_failedinput_1,&fp_failedinput_2);
#else
- Outbuffer_print_result(this,result,request
-#ifndef GSNAP
- ,Request_queryseq(request)
+ queue = RRlist_pop_id(queue,&id,&fp,&fp_failedinput);
#endif
- );
+ nqueued--;
+#ifdef GSNAP
+ Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2);
+#else
+ Outbuffer_print_filestrings(fp,fp_failedinput);
#endif
- Result_free(&result);
- Request_free(&request);
- noutput++;
+ noutput += 1;
+ /* Result_free(&result); */
+ /* Request_free(&request); */
}
}
}
}
+
#ifdef HAVE_PTHREAD
pthread_mutex_unlock(&this->lock);
#endif
-
}
+ debug(fprintf(stderr,"__outbuffer_thread_ordered has noutput %d, nbeyond %d, ntotal %d\n",
+ noutput,nbeyond,ntotal));
+
/* Obtain this->ntotal while locked, to prevent race between output thread and input thread */
#ifdef HAVE_PTHREAD
pthread_mutex_lock(&this->lock);
#endif
ntotal = this->ntotal;
+ nbeyond = this->nbeyond;
#ifdef HAVE_PTHREAD
pthread_mutex_unlock(&this->lock);
#endif
@@ -2798,4 +1281,3 @@ Outbuffer_thread_ordered (void *data) {
return (void *) NULL;
}
-
diff --git a/src/outbuffer.h b/src/outbuffer.h
index 272b370..7c0d318 100644
--- a/src/outbuffer.h
+++ b/src/outbuffer.h
@@ -1,4 +1,4 @@
-/* $Id: outbuffer.h 149319 2014-09-30 02:15:42Z twu $ */
+/* $Id: outbuffer.h 157571 2015-01-28 00:04:37Z twu $ */
#ifndef OUTBUFFER_INCLUDED
#define OUTBUFFER_INCLUDED
@@ -8,17 +8,17 @@
#include "sequence.h"
#include "iit-read-univ.h"
#include "iit-read.h"
+#include "samflags.h"
+#include "filestring.h"
#include "request.h"
#include "mem.h" /* To get MEMUSAGE */
#ifdef GSNAP
-#include "goby.h"
#include "resulthr.h"
#else
#include "stage3.h" /* Has Printtype_T */
-#include "result.h"
#include "genome.h"
#endif
@@ -27,42 +27,27 @@
#define T Outbuffer_T
typedef struct T *T;
+extern void
+Outbuffer_setup (int argc_in, char **argv_in, int optind_in,
+ Univ_IIT_T chromosome_iit_in, bool any_circular_p_in,
+ int nworkers_in, bool orderedp_in, bool quiet_if_excessive_p_in,
#ifdef GSNAP
-
-extern T
-Outbuffer_new (unsigned int output_buffer_size, unsigned int nread, char *sevenway_root, char *failedinput_root,
- bool appendp, Univ_IIT_T chromosome_iit, bool timingp,
- bool output_sam_p, bool sam_headers_p, char *sam_read_group_id, char *sam_read_group_name,
- char *sam_read_group_library, char *sam_read_group_platform,
- int nworkers, bool orderedp, Gobywriter_T gobywriter, bool nofailsp, bool failsonlyp,
- bool fastq_format_p, bool clip_overlap_p, bool merge_overlap_p, bool merge_samechr_p, bool print_m8_p,
- int maxpaths_report, bool quiet_if_excessive_p, int quality_shift,
- bool invert_first_p, bool invert_second_p, Chrpos_T pairmax,
- int argc, char **argv, int optind);
-
+ bool output_sam_p_in,
#else
+ Printtype_T printtype_in, Sequence_T usersegment_in,
+#endif
+ bool sam_headers_p_in, char *sam_read_group_id_in, char *sam_read_group_name_in,
+ char *sam_read_group_library_in, char *sam_read_group_platform_in,
+ bool appendp_in, char *output_file_in, char *split_output_root_in, char *failedinput_root_in);
+
+extern void
+Outbuffer_cleanup ();
extern T
-Outbuffer_new (unsigned int output_buffer_size, unsigned int nread, char *sevenway_root, char *failedinput_root,
- bool appendp, bool chimeras_allowed_p, char *user_genomicseg, Sequence_T usersegment,
- char *dbversion, Genome_T genome, Univ_IIT_T chromosome_iit, char *chrsubset_name,
- Univ_IIT_T contig_iit, IIT_T altstrain_iit, IIT_T map_iit,
- int *map_divint_crosstable, Printtype_T printtype, bool checksump, int chimera_margin,
-#ifndef PMAP
- bool sam_headers_p, int quality_shift, bool sam_paired_p,
- char *sam_read_group_id, char *sam_read_group_name,
- char *sam_read_group_library, char *sam_read_group_platform,
- int nworkers, bool orderedp,
-#endif
- bool nofailsp, bool failsonlyp, int maxpaths_report, bool quiet_if_excessive_p,
- bool map_exons_p, bool map_bothstrands_p, bool print_comment_p, int nflanking,
- int proteinmode, int invertmode, bool nointronlenp, int wraplength,
- int ngap, int cds_startpos,
- bool fulllengthp, bool truncatep, bool strictp, bool diagnosticp, bool maponlyp,
- bool stage1debug, bool diag_debug, bool debug_graphic_p,
- int argc, char **argv, int optind);
+Outbuffer_new (unsigned int output_buffer_size, unsigned int nread);
-#endif
+extern void
+Outbuffer_close_files ();
extern void
Outbuffer_free (T *old);
@@ -70,26 +55,24 @@ Outbuffer_free (T *old);
extern unsigned int
Outbuffer_nread (T this);
+extern unsigned int
+Outbuffer_nbeyond (T this);
+
extern void
Outbuffer_add_nread (T this, unsigned int nread);
+#ifdef GSNAP
extern void
-Outbuffer_put_result (T this, Result_T result, Request_T request);
+Outbuffer_put_filestrings (T this, Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2);
-#ifdef GSNAP
extern void
-Outbuffer_print_result (T this, Result_T result, Request_T request
-#ifdef MEMUSAGE
- , unsigned int noutput
-#endif
- );
+Outbuffer_print_filestrings (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2);
#else
extern void
-Outbuffer_print_result (T this, Result_T result, Request_T request, Sequence_T headerseq
-#ifdef MEMUSAGE
- , unsigned int noutput
-#endif
- );
+Outbuffer_put_filestrings (T this, Filestring_T fp, Filestring_T fp_failedinput);
+
+extern void
+Outbuffer_print_filestrings (Filestring_T fp, Filestring_T fp_failedinput_1);
#endif
extern void *
@@ -98,6 +81,12 @@ Outbuffer_thread_anyorder (void *data);
extern void *
Outbuffer_thread_ordered (void *data);
+#ifdef USE_MPI
+extern void
+Outbuffer_mpi_process (T this, int n_worker_procs, int part_modulus, int part_interval);
+#endif
+
+
#undef T
#endif
diff --git a/src/output.c b/src/output.c
new file mode 100644
index 0000000..46fd3b4
--- /dev/null
+++ b/src/output.c
@@ -0,0 +1,972 @@
+static char rcsid[] = "$Id: output.c 166468 2015-05-28 02:39:14Z twu $";
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "output.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "sequence.h"
+
+#ifdef GSNAP
+#include "shortread.h"
+#include "samprint.h"
+#include "stage3hr.h"
+#endif
+
+#include "samheader.h"
+#include "samflags.h" /* For output types */
+
+
+/* For GSNAP, now handling --failsonlyp in sam_sort. Still
+ handling quiet-if-excessive in GMAP/GSNAP, because that changes the
+ SAM line to a nomapping format. */
+
+static Univ_IIT_T chromosome_iit;
+static bool nofailsp;
+static bool failsonlyp;
+static bool quiet_if_excessive_p;
+static int maxpaths_report;
+static int quality_shift;
+
+#ifdef GSNAP
+static bool output_sam_p;
+static bool print_m8_p;
+static bool invert_first_p;
+static bool invert_second_p;
+
+static bool merge_samechr_p;
+
+#else
+static Printtype_T printtype;
+static int invertmode;
+static int wraplength;
+static int ngap;
+static bool nointronlenp;
+static bool sam_paired_p;
+
+static int cds_startpos;
+static bool fulllengthp;
+static bool truncatep;
+static bool strictp;
+static bool checksump;
+
+static Genome_T genome;
+static Sequence_T usersegment;
+static char *user_genomicseg;
+
+static char *dbversion;
+static char *chrsubset_name;
+static Univ_IIT_T contig_iit;
+static IIT_T altstrain_iit;
+static bool chimeras_allowed_p;
+
+static IIT_T map_iit;
+static int *map_divint_crosstable;
+static bool map_exons_p;
+static bool map_bothstrands_p;
+static int nflanking;
+static bool print_comment_p;
+#endif
+
+static char *failedinput_root;
+static char *sam_read_group_id;
+
+
+void
+Output_setup (Univ_IIT_T chromosome_iit_in,
+ bool nofailsp_in, bool failsonlyp_in, bool quiet_if_excessive_p_in, int maxpaths_report_in,
+ char *failedinput_root_in, int quality_shift_in,
+#ifdef GSNAP
+ bool output_sam_p_in, bool print_m8_p_in, bool invert_first_p_in, bool invert_second_p_in,
+ bool merge_samechr_p_in,
+#else
+ Printtype_T printtype_in, int invertmode_in, int wraplength_in, int ngap_in,
+ bool nointronlenp_in, bool sam_paired_p_in, int cds_startpos_in,
+ bool fulllengthp_in, bool truncatep_in, bool strictp_in, bool checksump_in,
+
+ Genome_T genome_in, Sequence_T usersegment_in, char *user_genomicseg_in,
+ char *dbversion_in, char *chrsubset_name_in,
+ Univ_IIT_T contig_iit_in, IIT_T altstrain_iit_in, bool chimeras_allowed_p_in,
+ IIT_T map_iit_in, int *map_divint_crosstable_in, bool map_exons_p_in,
+ bool map_bothstrands_p_in, int nflanking_in, bool print_comment_p_in,
+#endif
+ char *sam_read_group_id_in) {
+
+ chromosome_iit = chromosome_iit_in;
+
+ nofailsp = nofailsp_in;
+ failsonlyp = failsonlyp_in;
+ quiet_if_excessive_p = quiet_if_excessive_p_in;
+ maxpaths_report = maxpaths_report_in;
+ failedinput_root = failedinput_root_in;
+
+ quality_shift = quality_shift_in;
+
+#ifdef GSNAP
+ output_sam_p = output_sam_p_in;
+ print_m8_p = print_m8_p_in;
+ invert_first_p = invert_first_p_in;
+ invert_second_p = invert_second_p_in;
+
+ merge_samechr_p = merge_samechr_p_in;
+
+#else
+ printtype = printtype_in;
+ invertmode = invertmode_in;
+ wraplength = wraplength_in;
+ ngap = ngap_in;
+ nointronlenp = nointronlenp_in;
+ sam_paired_p = sam_paired_p_in;
+
+ cds_startpos = cds_startpos_in;
+ fulllengthp = fulllengthp_in;
+ truncatep = truncatep_in;
+ strictp = strictp_in;
+ checksump = checksump_in;
+
+ genome = genome_in;
+ usersegment = usersegment_in;
+ user_genomicseg = user_genomicseg_in;
+
+ dbversion = dbversion_in;
+ chrsubset_name = chrsubset_name_in;
+ contig_iit = contig_iit_in;
+ altstrain_iit = altstrain_iit_in;
+ chimeras_allowed_p = chimeras_allowed_p_in;
+
+ map_iit = map_iit_in;
+ map_divint_crosstable = map_divint_crosstable_in;
+ map_exons_p = map_exons_p_in;
+ map_bothstrands_p = map_bothstrands_p_in;
+ nflanking = nflanking_in;
+ print_comment_p = print_comment_p_in;
+#endif
+
+ sam_read_group_id = sam_read_group_id_in;
+
+ return;
+}
+
+
+#ifdef GSNAP
+/************************************************************************
+ * Print routines and threads for GSNAP
+ ************************************************************************/
+
+/* Taken from print_result_sam from old outbuffer.c */
+static Filestring_T
+filestring_fromresult_sam (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
+ Result_T result, Request_T request) {
+ Filestring_T fp;
+ Resulttype_T resulttype;
+ Shortread_T queryseq1;
+ Stage3end_T *stage3array, stage3;
+ Chrpos_T chrpos;
+ int npaths, pathnum, first_absmq, second_absmq;
+ char *abbrev;
+
+ fp = Filestring_new(Request_id(request));
+ if (failedinput_root == NULL) {
+ *fp_failedinput_1 = (Filestring_T) NULL;
+ } else {
+ *fp_failedinput_1 = Filestring_new(Request_id(request));
+ }
+
+ resulttype = Result_resulttype(result);
+ if (resulttype == SINGLEEND_NOMAPPING) {
+ *fp_failedinput_2 = (Filestring_T) NULL;
+ queryseq1 = Request_queryseq1(request);
+ if (nofailsp == true) {
+ /* Skip */
+ } else {
+ Filestring_set_split_output(fp,OUTPUT_NM); /* Needs to go outside of nofailsp */
+ SAM_print_nomapping(fp,ABBREV_NOMAPPING_1,
+ queryseq1,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
+ /*acc2*/NULL,chromosome_iit,resulttype,
+ /*first_read_p*/true,/*npaths*/0,/*npaths_mate*/0,/*mate_chrpos*/0U,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ if (failedinput_root != NULL) {
+ Shortread_print_query_singleend(*fp_failedinput_1,queryseq1,/*headerseq*/queryseq1);
+ }
+ }
+
+ } else if (resulttype == SINGLEEND_UNIQ) {
+ *fp_failedinput_2 = (Filestring_T) NULL;
+ if (failsonlyp == true) {
+ /* Skip */
+ } else {
+ queryseq1 = Request_queryseq1(request);
+
+ stage3array = (Stage3end_T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
+ stage3 = stage3array[0];
+ if (Stage3end_hittype(stage3) == SAMECHR_SPLICE || Stage3end_hittype(stage3) == TRANSLOC_SPLICE) {
+ chrpos = 0;
+ } else {
+ chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1),
+ /*first_read_p*/true);
+ }
+ if (Stage3end_circularpos(stage3) > 0) {
+ Filestring_set_split_output(fp,OUTPUT_UC);
+ abbrev = ABBREV_UNPAIRED_CIRCULAR;
+ } else {
+ Filestring_set_split_output(fp,OUTPUT_UU);
+ abbrev = ABBREV_UNPAIRED_UNIQ;
+ }
+ SAM_print(fp,*fp_failedinput_1,abbrev,stage3,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),/*acc2*/NULL,
+ /*pathnum*/1,npaths,Stage3end_absmq_score(stage3array[0]),first_absmq,second_absmq,
+ Stage3end_mapq_score(stage3array[0]),
+ chromosome_iit,queryseq1,/*queryseq2*/NULL,
+ /*pairedlength*/0,chrpos,/*mate_chrpos*/0U,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/0,quality_shift,
+ sam_read_group_id,invert_first_p,invert_second_p,merge_samechr_p);
+ }
+
+ } else if (resulttype == SINGLEEND_TRANSLOC) {
+ *fp_failedinput_2 = (Filestring_T) NULL;
+
+ Filestring_set_split_output(fp,OUTPUT_UT);
+ stage3array = (Stage3end_T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
+ if (failsonlyp == true) {
+ /* Skip */
+ } else if (quiet_if_excessive_p && npaths > maxpaths_report) {
+ queryseq1 = Request_queryseq1(request);
+ SAM_print_nomapping(fp,ABBREV_UNPAIRED_TRANSLOC,
+ queryseq1,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
+ /*acc2*/NULL,chromosome_iit,resulttype,
+ /*first_read_p*/true,npaths,/*npaths_mate*/0,/*mate_chrpos*/0U,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ if (failedinput_root != NULL) {
+ Shortread_print_query_singleend(*fp_failedinput_1,queryseq1,/*headerseq*/queryseq1);
+ }
+
+ } else {
+ queryseq1 = Request_queryseq1(request);
+
+ for (pathnum = 1; pathnum <= npaths && pathnum <= maxpaths_report; pathnum++) {
+ stage3 = stage3array[pathnum-1];
+ if (Stage3end_hittype(stage3) == SAMECHR_SPLICE || Stage3end_hittype(stage3) == TRANSLOC_SPLICE) {
+ chrpos = 0;
+ } else {
+ chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1),
+ /*first_read_p*/true);
+ }
+ SAM_print(fp,*fp_failedinput_1,ABBREV_UNPAIRED_TRANSLOC,
+ stage3,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
+ /*acc2*/NULL,pathnum,npaths,
+ Stage3end_absmq_score(stage3array[pathnum-1]),first_absmq,second_absmq,
+ Stage3end_mapq_score(stage3array[pathnum-1]),
+ chromosome_iit,queryseq1,/*queryseq2*/NULL,
+ /*pairedlength*/0,chrpos,/*mate_chrpos*/0U,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/0,quality_shift,
+ sam_read_group_id,invert_first_p,invert_second_p,merge_samechr_p);
+ }
+ }
+
+ } else if (resulttype == SINGLEEND_MULT) {
+ *fp_failedinput_2 = (Filestring_T) NULL;
+ stage3array = (Stage3end_T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
+
+ if (failsonlyp == true) {
+ /* Skip */
+ } else if (quiet_if_excessive_p && npaths > maxpaths_report) {
+ Filestring_set_split_output(fp,OUTPUT_UX);
+ queryseq1 = Request_queryseq1(request);
+ SAM_print_nomapping(fp,ABBREV_UNPAIRED_MULT_XS,
+ queryseq1,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
+ /*acc2*/NULL,chromosome_iit,resulttype,
+ /*first_read_p*/true,npaths,/*npaths_mate*/0,/*mate_chrpos*/0U,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+ if (failedinput_root != NULL) {
+ Shortread_print_query_singleend(*fp_failedinput_1,queryseq1,/*headerseq*/queryseq1);
+ }
+
+ } else {
+ Filestring_set_split_output(fp,OUTPUT_UM);
+ queryseq1 = Request_queryseq1(request);
+ for (pathnum = 1; pathnum <= npaths && pathnum <= maxpaths_report; pathnum++) {
+ stage3 = stage3array[pathnum-1];
+ if (Stage3end_hittype(stage3) == SAMECHR_SPLICE || Stage3end_hittype(stage3) == TRANSLOC_SPLICE) {
+ chrpos = 0;
+ } else {
+ chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1),
+ /*first_read_p*/true);
+ }
+ SAM_print(fp,*fp_failedinput_1,ABBREV_UNPAIRED_MULT,
+ stage3,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
+ /*acc2*/NULL,pathnum,npaths,
+ Stage3end_absmq_score(stage3array[pathnum-1]),first_absmq,second_absmq,
+ Stage3end_mapq_score(stage3array[pathnum-1]),
+ chromosome_iit,queryseq1,/*queryseq2*/NULL,
+ /*pairedlength*/0,chrpos,/*mate_chrpos*/0U,
+ /*clipdir*/0,/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+ resulttype,/*first_read_p*/true,/*npaths_mate*/0,quality_shift,
+ sam_read_group_id,invert_first_p,invert_second_p,merge_samechr_p);
+ }
+ }
+
+ } else {
+ if (failedinput_root == NULL) {
+ *fp_failedinput_2 = (Filestring_T) NULL;
+ } else {
+ *fp_failedinput_2 = Filestring_new(Request_id(request));
+ }
+ SAM_print_paired(fp,*fp_failedinput_1,*fp_failedinput_2,result,resulttype,chromosome_iit,
+ Request_queryseq1(request),Request_queryseq2(request),
+ invert_first_p,invert_second_p,nofailsp,failsonlyp,
+ merge_samechr_p,quality_shift,sam_read_group_id);
+ }
+
+ return fp;
+}
+
+
+static void
+print_header_singleend (Filestring_T fp, Request_T request, bool translocationp, int npaths) {
+ Shortread_T queryseq1;
+
+ if (print_m8_p == false) {
+ queryseq1 = Request_queryseq1(request);
+
+ FPRINTF(fp,">");
+ Shortread_print_oneline(fp,queryseq1);
+ FPRINTF(fp,"\t%d",npaths);
+ if (translocationp == true) {
+ FPRINTF(fp," (transloc)");
+ }
+
+ /* No sequence inversion on single-end reads */
+ if (Shortread_quality_string(queryseq1) != NULL) {
+ FPRINTF(fp,"\t");
+ Shortread_print_quality(fp,queryseq1,/*hardclip_low*/0,/*hardclip_high*/0,
+ quality_shift,/*show_chopped_p*/true);
+ }
+
+ FPRINTF(fp,"\t");
+ Shortread_print_header(fp,queryseq1,/*queryseq2*/NULL);
+ /* FPRINTF(fp,"\n"); -- included in header */
+ }
+
+ return;
+}
+
+
+/* Taken from print_result_gsnap from old outbuffer.c */
+static Filestring_T
+filestring_fromresult_gsnap (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
+ Result_T result, Request_T request) {
+ Filestring_T fp;
+ Resulttype_T resulttype;
+ Shortread_T queryseq1, queryseq2;
+ Stage3end_T *stage3array, stage3;
+ int npaths, pathnum, first_absmq, second_absmq;
+
+ fp = Filestring_new(Request_id(request));
+ if (failedinput_root == NULL) {
+ *fp_failedinput_1 = (Filestring_T) NULL;
+ } else {
+ *fp_failedinput_1 = Filestring_new(Request_id(request));
+ }
+
+ resulttype = Result_resulttype(result);
+
+ if (resulttype == SINGLEEND_NOMAPPING) {
+ *fp_failedinput_2 = (Filestring_T) NULL;
+ if (nofailsp == true) {
+ /* Skip */
+ } else if (print_m8_p) {
+ /* Skip */
+ } else {
+ Filestring_set_split_output(fp,OUTPUT_NM);
+ print_header_singleend(fp,request,/*translocationp*/false,/*npaths*/0);
+ FPRINTF(fp,"\n");
+
+ if (failedinput_root != NULL) {
+ queryseq1 = Request_queryseq1(request);
+ Shortread_print_query_singleend(*fp_failedinput_1,queryseq1,/*headerseq*/queryseq1);
+ }
+ }
+
+ } else if (resulttype == SINGLEEND_UNIQ) {
+ *fp_failedinput_2 = (Filestring_T) NULL;
+ if (failsonlyp == true) {
+ /* Skip */
+ } else {
+ Filestring_set_split_output(fp,OUTPUT_UU);
+ queryseq1 = Request_queryseq1(request);
+
+ stage3array = (Stage3end_T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
+ stage3 = stage3array[0];
+
+ print_header_singleend(fp,request,/*translocationp*/false,/*npaths*/1);
+ Stage3end_print(fp,stage3,Stage3end_score(stage3),
+ chromosome_iit,queryseq1,/*headerseq*/queryseq1,/*acc_suffix*/"",
+ invert_first_p,/*hit5*/(Stage3end_T) NULL,/*hit3*/(Stage3end_T) NULL,
+ /*pairlength*/0,/*pairscore*/0,/*pairtype*/UNPAIRED,
+ Stage3end_mapq_score(stage3));
+ if (print_m8_p == false) {
+ FPRINTF(fp,"\n");
+ }
+ }
+
+ } else if (resulttype == SINGLEEND_TRANSLOC) {
+ *fp_failedinput_2 = (Filestring_T) NULL;
+ Filestring_set_split_output(fp,OUTPUT_UT);
+
+ stage3array = (Stage3end_T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
+
+ if (failsonlyp == true) {
+ /* Skip */
+
+ } else if (quiet_if_excessive_p && npaths > maxpaths_report) {
+ print_header_singleend(fp,request,/*translocationp*/true,npaths);
+ FPRINTF(fp,"\n");
+
+ } else {
+ queryseq1 = Request_queryseq1(request);
+
+ print_header_singleend(fp,request,/*translocationp*/true,npaths);
+ for (pathnum = 1; pathnum <= npaths && pathnum <= maxpaths_report; pathnum++) {
+ stage3 = stage3array[pathnum-1];
+ Stage3end_print(fp,stage3,Stage3end_score(stage3),
+ chromosome_iit,queryseq1,/*headerseq*/queryseq1,/*acc_suffix*/"",
+ invert_first_p,/*hit5*/(Stage3end_T) NULL,/*hit3*/(Stage3end_T) NULL,
+ /*pairlength*/0,/*pairscore*/0,/*pairtype*/UNPAIRED,
+ Stage3end_mapq_score(stage3));
+ }
+ if (print_m8_p == false) {
+ FPRINTF(fp,"\n");
+ }
+ }
+
+ } else if (resulttype == SINGLEEND_MULT) {
+ *fp_failedinput_2 = (Filestring_T) NULL;
+ stage3array = (Stage3end_T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
+
+ if (failsonlyp == true) {
+ /* Skip */
+
+ } else if (quiet_if_excessive_p && npaths > maxpaths_report) {
+ Filestring_set_split_output(fp,OUTPUT_UX);
+ print_header_singleend(fp,request,/*translocationp*/false,npaths);
+ if (print_m8_p == false) {
+ FPRINTF(fp,"\n");
+ }
+
+ } else {
+ queryseq1 = Request_queryseq1(request);
+
+ Filestring_set_split_output(fp,OUTPUT_UM);
+ print_header_singleend(fp,request,/*translocationp*/false,npaths);
+ for (pathnum = 1; pathnum <= npaths && pathnum <= maxpaths_report; pathnum++) {
+ stage3 = stage3array[pathnum-1];
+ Stage3end_print(fp,stage3,Stage3end_score(stage3),
+ chromosome_iit,queryseq1,/*headerseq*/queryseq1,/*acc_suffix*/"",
+ invert_first_p,/*hit5*/(Stage3end_T) NULL,/*hit3*/(Stage3end_T) NULL,
+ /*pairlength*/0,/*pairscore*/0,/*pairtype*/UNPAIRED,
+ Stage3end_mapq_score(stage3));
+ }
+ if (print_m8_p == false) {
+ FPRINTF(fp,"\n");
+ }
+ }
+
+ } else if (resulttype == PAIREDEND_NOMAPPING) {
+ if (failedinput_root == NULL) {
+ *fp_failedinput_2 = (Filestring_T) NULL;
+ } else {
+ *fp_failedinput_2 = Filestring_new(Request_id(request));
+ }
+
+ if (nofailsp == true) {
+ /* No output */
+
+ } else {
+ queryseq1 = Request_queryseq1(request);
+ queryseq2 = Request_queryseq2(request);
+ /* Stage3pair_print_end will call Filestring_set_split_output(), based on resulttype */
+
+ /* First end */
+ Stage3pair_print_end(fp,*fp_failedinput_1,result,resulttype,'>',/*firstp*/true,chromosome_iit,
+ /*queryseq*/queryseq1,/*headerseq1*/queryseq1,/*headerseq2*/queryseq2,
+ maxpaths_report,quiet_if_excessive_p,invert_first_p,quality_shift);
+
+ /* Second end */
+ Stage3pair_print_end(fp,*fp_failedinput_2,result,resulttype,'<',/*firstp*/false,chromosome_iit,
+ /*queryseq*/queryseq2,/*headerseq1*/queryseq1,/*headerseq2*/queryseq2,
+ maxpaths_report,quiet_if_excessive_p,invert_second_p,quality_shift);
+
+ if (failedinput_root != NULL) {
+ Shortread_print_query_pairedend(*fp_failedinput_1,*fp_failedinput_2,queryseq1,queryseq2);
+ }
+ }
+
+ } else {
+ if (failedinput_root == NULL) {
+ *fp_failedinput_2 = (Filestring_T) NULL;
+ } else {
+ *fp_failedinput_2 = Filestring_new(Request_id(request));
+ }
+
+ if (failsonlyp == true) {
+ /* Unwanted success: skip */
+
+ } else {
+ queryseq1 = Request_queryseq1(request);
+ queryseq2 = Request_queryseq2(request);
+ /* Stage3pair_print_end will call Filestring_set_split_output() based on resulttype */
+
+ /* First end */
+ Stage3pair_print_end(fp,*fp_failedinput_1,result,resulttype,'>',/*firstp*/true,chromosome_iit,
+ /*queryseq*/queryseq1,/*headerseq1*/queryseq1,/*headerseq2*/queryseq2,
+ maxpaths_report,quiet_if_excessive_p,invert_first_p,quality_shift);
+
+ /* Second end */
+ Stage3pair_print_end(fp,*fp_failedinput_2,result,resulttype,'<',/*firstp*/false,chromosome_iit,
+ /*queryseq*/queryseq2,/*headerseq1*/queryseq1,/*headerseq2*/queryseq2,
+ maxpaths_report,quiet_if_excessive_p,invert_second_p,quality_shift);
+ }
+ }
+
+ return fp;
+}
+
+Filestring_T
+Output_filestring_fromresult (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
+ Result_T result, Request_T request) {
+ if (output_sam_p == true) {
+ return filestring_fromresult_sam(&(*fp_failedinput_1),&(*fp_failedinput_2),result,request);
+ } else {
+ return filestring_fromresult_gsnap(&(*fp_failedinput_1),&(*fp_failedinput_2),result,request);
+ }
+}
+
+#else
+/************************************************************************
+ * Print routines and threads for GMAP
+ ************************************************************************/
+
+static void
+print_npaths (Filestring_T fp, int npaths, char *chrsubset_name, bool mergedp,
+ Chimera_T chimera, Failure_T failuretype) {
+
+ if (npaths == 0) {
+ FPRINTF(fp,"Paths (0):");
+ } else if (mergedp == true) {
+ FPRINTF(fp,"Paths (1):");
+ } else {
+ FPRINTF(fp,"Paths (%d):",npaths);
+ }
+ if (chrsubset_name != NULL) {
+ FPRINTF(fp," [chrsubset: %s]",chrsubset_name);
+ }
+ if (failuretype == NO_FAILURE) {
+ if (chimera != NULL) {
+ Chimera_print(fp,chimera);
+ }
+ } else if (failuretype == EMPTY_SEQUENCE) {
+ FPRINTF(fp," *** Empty sequence ***");
+ } else if (failuretype == SHORT_SEQUENCE) {
+ FPRINTF(fp," *** Short sequence < index oligo size ***");
+ } else if (failuretype == POOR_SEQUENCE) {
+ FPRINTF(fp," *** Poor sequence (use -p flag to change pruning behavior) ***");
+ } else if (failuretype == REPETITIVE) {
+ FPRINTF(fp," *** Repetitive sequence (use -p flag to change pruning behavior) ***");
+ }
+ FPRINTF(fp,"\n");
+ if (npaths == 0) {
+ FPRINTF(fp,"\n");
+ }
+ return;
+}
+
+
+/* Taken from Outbuffer_print_result */
+Filestring_T
+Output_filestring_fromresult (Filestring_T *fp_failedinput, Result_T result, Request_T request,
+ Sequence_T headerseq) {
+ Filestring_T fp;
+ char *abbrev;
+ Sequence_T queryseq;
+ Stage3_T *stage3array;
+ int npaths, pathnum, effective_maxpaths, first_absmq, second_absmq;
+ Chimera_T chimera = NULL;
+ int chimerapos, chimeraequivpos, chimera_cdna_direction;
+ int querylength;
+ double donor_prob, acceptor_prob;
+ bool mergedp = false;
+ bool printp = true;
+
+ fp = Filestring_new(Request_id(request));
+ if (failedinput_root == NULL) {
+ *fp_failedinput = (Filestring_T) NULL;
+ } else {
+ *fp_failedinput = Filestring_new(Request_id(request));
+ }
+
+ queryseq = Request_queryseq(request);
+ querylength = Sequence_fulllength_given(queryseq);
+
+ stage3array = Result_array(&npaths,&first_absmq,&second_absmq,result);
+
+ chimerapos = chimeraequivpos = -1;
+ chimera_cdna_direction = 0;
+ donor_prob = acceptor_prob = 0.0;
+
+ /* Translation */
+ if (npaths == 0) {
+ Filestring_set_split_output(fp,OUTPUT_NM);
+ abbrev = ABBREV_NOMAPPING_1;
+ effective_maxpaths = 0;
+ if (nofailsp == true) {
+ printp = false;
+ }
+
+ if (Result_failuretype(result) == POOR_SEQUENCE) {
+ fprintf(stderr,"Accession %s skipped (poor sequence). Use -p flag to change pruning behavior\n",Sequence_accession(headerseq));
+ } else if (Result_failuretype(result) == REPETITIVE) {
+ fprintf(stderr,"Accession %s skipped (repetitive sequence). Use -p flag to change pruning behavior\n",Sequence_accession(headerseq));
+ } else {
+ fprintf(stderr,"No paths found for %s\n",Sequence_accession(headerseq));
+ }
+
+ } else if ((mergedp = Result_mergedp(result)) == true) {
+ if (Stage3_circularpos(stage3array[0]) > 0) {
+ Filestring_set_split_output(fp,OUTPUT_UC);
+ abbrev = ABBREV_UNPAIRED_CIRCULAR;
+ } else {
+ Filestring_set_split_output(fp,OUTPUT_UU);
+ abbrev = ABBREV_UNPAIRED_UNIQ;
+ }
+ effective_maxpaths = 1;
+ if (failsonlyp == true) {
+ printp = false;
+ } else {
+ Stage3_translate(stage3array[0],
+#ifdef PMAP
+ queryseq,
+#endif
+ querylength,fulllengthp,cds_startpos,truncatep,strictp);
+ }
+
+ } else if ((chimera = Result_chimera(result)) != NULL) {
+ if (chimeras_allowed_p == true) {
+ effective_maxpaths = 2;
+ } else {
+ effective_maxpaths = 0;
+ }
+ Filestring_set_split_output(fp,OUTPUT_UT);
+ abbrev = ABBREV_UNPAIRED_TRANSLOC;
+
+ if (failsonlyp == true) {
+ printp = false;
+ } else {
+ chimerapos = Chimera_pos(chimera);
+ chimeraequivpos = Chimera_equivpos(chimera);
+ donor_prob = Chimera_donor_prob(chimera);
+ acceptor_prob = Chimera_acceptor_prob(chimera);
+ chimera_cdna_direction = Chimera_cdna_direction(chimera);
+
+ Stage3_translate_chimera(stage3array[0],stage3array[1],
+#ifdef PMAP
+ queryseq,
+#endif
+ querylength,fulllengthp,cds_startpos,truncatep,strictp);
+ }
+
+ } else if (maxpaths_report == 0) {
+ effective_maxpaths = 1;
+ if (npaths > 1) {
+ Filestring_set_split_output(fp,OUTPUT_UM);
+ abbrev = ABBREV_UNPAIRED_MULT;
+ } else if (Stage3_circularpos(stage3array[0]) > 0) {
+ Filestring_set_split_output(fp,OUTPUT_UC);
+ abbrev = ABBREV_UNPAIRED_CIRCULAR;
+ } else {
+ Filestring_set_split_output(fp,OUTPUT_UU);
+ abbrev = ABBREV_UNPAIRED_UNIQ;
+ }
+
+ if (failsonlyp == true) {
+ printp = false;
+ } else if (quiet_if_excessive_p && npaths > maxpaths_report) {
+ printp = false;
+ } else {
+ Stage3_translate(stage3array[0],
+#ifdef PMAP
+ queryseq,
+#endif
+ querylength,fulllengthp,cds_startpos,truncatep,strictp);
+ }
+
+ } else {
+ if (npaths > 1) {
+ Filestring_set_split_output(fp,OUTPUT_UM);
+ abbrev = ABBREV_UNPAIRED_MULT;
+ } else if (Stage3_circularpos(stage3array[0]) > 0) {
+ Filestring_set_split_output(fp,OUTPUT_UC);
+ abbrev = ABBREV_UNPAIRED_CIRCULAR;
+ } else {
+ Filestring_set_split_output(fp,OUTPUT_UU);
+ abbrev = ABBREV_UNPAIRED_UNIQ;
+ }
+
+ if (npaths < maxpaths_report) {
+ effective_maxpaths = npaths;
+ } else {
+ effective_maxpaths = maxpaths_report;
+ }
+
+ if (failsonlyp == true) {
+ printp = false;
+ } else if (quiet_if_excessive_p && npaths > maxpaths_report) {
+ printp = false;
+ } else {
+ for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
+ Stage3_translate(stage3array[pathnum-1],
+#ifdef PMAP
+ queryseq,
+#endif
+ querylength,fulllengthp,cds_startpos,truncatep,strictp);
+ }
+ }
+ }
+
+ /* Printing */
+ if (printp == false) {
+ /* No output, either because of --nofails or --quiet-if-excessive */
+
+ } else {
+ if (*fp_failedinput != NULL &&
+ (npaths == 0 && quiet_if_excessive_p && npaths > maxpaths_report)) {
+ PUTC('>',*fp_failedinput);
+ Sequence_print_header(*fp_failedinput,headerseq,checksump);
+ Sequence_print(*fp_failedinput,queryseq,/*uppercasep*/false,wraplength,/*trimmedp*/false);
+ }
+
+ if (printtype == SIMPLE || printtype == SUMMARY || printtype == ALIGNMENT) {
+ /* Print header, even if no alignment is found */
+ PUTC('>',fp);
+ Sequence_print_header(fp,headerseq,checksump);
+
+ if (npaths == 0) {
+ print_npaths(fp,0,chrsubset_name,
+ /*mergedp*/false,/*chimera*/NULL,Result_failuretype(result));
+
+
+ } else {
+ print_npaths(fp,npaths,chrsubset_name,mergedp,chimera,NO_FAILURE);
+ for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
+ Stage3_print_pathsummary(fp,stage3array[pathnum-1],pathnum,
+ chromosome_iit,contig_iit,
+ altstrain_iit,queryseq,dbversion,/*maxmutations*/1000000);
+ }
+ }
+
+ if (printtype != SIMPLE) {
+ FPRINTF(fp,"Alignments:\n");
+ for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
+ FPRINTF(fp," Alignment for path %d:\n\n",pathnum);
+ Stage3_print_alignment(fp,stage3array[pathnum-1],
+ genome,chromosome_iit,printtype,
+ /*continuousp*/false,/*continuous_by_exon_p*/false,
+ /*flipgenomep*/true,invertmode,nointronlenp,wraplength);
+ }
+ }
+
+ if (map_iit != NULL) {
+ FPRINTF(fp,"Maps:\n");
+ for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
+ Stage3_print_map(fp,stage3array[pathnum-1],map_iit,map_divint_crosstable,
+ chromosome_iit,pathnum,map_exons_p,map_bothstrands_p,
+ nflanking,print_comment_p);
+ }
+ }
+
+ } else if (printtype == COMPRESSED) {
+ for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
+ Stage3_print_compressed(fp,stage3array[pathnum-1],queryseq,chromosome_iit,
+ dbversion,usersegment,pathnum,npaths,
+ checksump,chimerapos,chimeraequivpos,
+ donor_prob,acceptor_prob,chimera_cdna_direction);
+ }
+
+ } else if (printtype == CONTINUOUS) {
+ PUTC('>',fp);
+ Sequence_print_header(fp,headerseq,checksump);
+ if (npaths == 0) {
+ FPRINTF(fp,"\n\n\n");
+ } else {
+ Stage3_print_alignment(fp,stage3array[0],genome,chromosome_iit,printtype,
+ /*continuousp*/true,/*continuous_by_exon_p*/false,
+ /*flipgenomep*/true,invertmode,nointronlenp,wraplength);
+ }
+
+ } else if (printtype == CONTINUOUS_BY_EXON) {
+ PUTC('>',fp);
+ Sequence_print_header(fp,headerseq,checksump);
+ print_npaths(fp,npaths,chrsubset_name,mergedp,chimera,NO_FAILURE);
+ if (npaths == 0) {
+ FPRINTF(fp,"\n\n\n");
+ } else {
+ Stage3_print_pathsummary(fp,stage3array[0],/*pathnum*/1,
+ chromosome_iit,contig_iit,
+ altstrain_iit,queryseq,
+ dbversion,/*maxmutations*/1000000);
+ FPRINTF(fp,"Alignments:\n");
+ FPRINTF(fp," Alignment for path %d:\n\n",/*pathnum*/1);
+ Stage3_print_alignment(fp,stage3array[0],genome,chromosome_iit,printtype,
+ /*continuousp*/false,/*continuous_by_exon_p*/true,
+ /*flipgenomep*/true,invertmode,nointronlenp,wraplength);
+ }
+
+ } else if (printtype == EXONS_CDNA) {
+ PUTC('>',fp);
+ Sequence_print_header(fp,headerseq,checksump);
+ for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
+ FPRINTF(fp,"<path %d>\n",pathnum);
+ Pair_print_exons(fp,Stage3_pairarray(stage3array[0]),Stage3_npairs(stage3array[0]),
+ wraplength,ngap,/*cdna*/true);
+ FPRINTF(fp,"</path>\n");
+ }
+
+ } else if (printtype == EXONS_GENOMIC) {
+ PUTC('>',fp);
+ Sequence_print_header(fp,headerseq,checksump);
+ for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
+ FPRINTF(fp,"<path %d>\n",pathnum);
+ Pair_print_exons(fp,Stage3_pairarray(stage3array[0]),Stage3_npairs(stage3array[0]),
+ wraplength,ngap,/*cdna*/false);
+ FPRINTF(fp,"</path>\n");
+ }
+
+ } else if (printtype == CDNA) {
+ for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
+ PUTC('>',fp);
+ Sequence_print_header(fp,headerseq,checksump);
+ Stage3_print_cdna(fp,stage3array[pathnum-1],wraplength);
+ }
+
+ } else if (printtype == PROTEIN_GENOMIC) {
+ for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
+ PUTC('>',fp);
+ Sequence_print_header(fp,headerseq,checksump);
+ Stage3_print_protein_genomic(fp,stage3array[pathnum-1],wraplength);
+ }
+
+ } else if (printtype == PSL_NT) {
+ for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
+ Stage3_print_pslformat_nt(fp,stage3array[pathnum-1],
+ chromosome_iit,usersegment,queryseq);
+ }
+
+#ifdef PMAP
+ } else if (printtype == PSL_PRO) {
+ for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
+ Stage3_print_pslformat_pro(fp,stage3array[pathnum-1],
+ chromosome_iit,usersegment,queryseq,strictp);
+ }
+#endif
+
+ } else if (printtype == GFF3_GENE || printtype == GFF3_MATCH_CDNA ||
+ printtype == GFF3_MATCH_EST) {
+ for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
+ Stage3_print_gff3(fp,stage3array[pathnum-1],pathnum,
+ chromosome_iit,usersegment,queryseq,querylength,printtype,
+ /*sourcename*/usersegment ? user_genomicseg : dbversion);
+ }
+
+#ifndef PMAP
+ } else if (printtype == SAM) {
+ if (npaths == 0) {
+ Pair_print_sam_nomapping(fp,abbrev,/*acc1*/Sequence_accession(headerseq),/*acc2*/NULL,
+ Sequence_fullpointer(queryseq),Sequence_quality_string(queryseq),
+ Sequence_fulllength(queryseq),quality_shift,
+ Sequence_firstp(queryseq),sam_paired_p,sam_read_group_id);
+
+ } else if (quiet_if_excessive_p && npaths > maxpaths_report) {
+ Pair_print_sam_nomapping(fp,abbrev,/*acc1*/Sequence_accession(headerseq),/*acc2*/NULL,
+ Sequence_fullpointer(queryseq),Sequence_quality_string(queryseq),
+ Sequence_fulllength(queryseq),quality_shift,
+ Sequence_firstp(queryseq),sam_paired_p,sam_read_group_id);
+
+ } else if (mergedp == true) {
+ Stage3_print_sam(fp,abbrev,stage3array[0],/*pathnum*/1,/*npaths*/1,
+ Stage3_absmq_score(stage3array[0]),first_absmq,second_absmq,
+ Stage3_mapq_score(stage3array[0]),
+ chromosome_iit,usersegment,queryseq,
+ /*chimera_part*/0,/*chimera*/NULL,quality_shift,sam_paired_p,
+ sam_read_group_id);
+
+ } else if (chimera != NULL) {
+ Stage3_print_sam(fp,abbrev,stage3array[0],/*pathnum*/1,npaths,
+ Stage3_absmq_score(stage3array[0]),first_absmq,second_absmq,
+ Stage3_mapq_score(stage3array[0]),
+ chromosome_iit,usersegment,queryseq,
+ /*chimera_part*/-1,chimera,quality_shift,sam_paired_p,
+ sam_read_group_id);
+ Stage3_print_sam(fp,abbrev,stage3array[1],/*pathnum*/1,npaths,
+ Stage3_absmq_score(stage3array[0]),first_absmq,second_absmq,
+ Stage3_mapq_score(stage3array[0]),
+ chromosome_iit,usersegment,queryseq,
+ /*chimera_part*/+1,chimera,quality_shift,sam_paired_p,
+ sam_read_group_id);
+
+ } else {
+ for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
+ Stage3_print_sam(fp,abbrev,stage3array[pathnum-1],pathnum,npaths,
+ Stage3_absmq_score(stage3array[pathnum-1]),first_absmq,second_absmq,
+ Stage3_mapq_score(stage3array[pathnum-1]),
+ chromosome_iit,usersegment,queryseq,
+ /*chimera_part*/0,/*chimera*/NULL,quality_shift,sam_paired_p,
+ sam_read_group_id);
+ }
+ }
+#endif
+
+ } else if (printtype == COORDS) {
+ for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
+ FPRINTF(fp,">");
+ Sequence_print_header(fp,headerseq,checksump);
+ Stage3_print_coordinates(fp,stage3array[pathnum-1],chromosome_iit,invertmode);
+ }
+
+ } else if (printtype == SPLICESITES) {
+ /* Print only best path */
+ if (npaths > 0) {
+ Stage3_print_splicesites(fp,stage3array[0],chromosome_iit,queryseq);
+ }
+
+ } else if (printtype == INTRONS) {
+ /* Print only best path */
+ if (npaths > 0) {
+ Stage3_print_introns(fp,stage3array[0],chromosome_iit,queryseq);
+ }
+
+ } else if (printtype == MAP_RANGES) {
+ for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
+ Stage3_print_iit_map(fp,stage3array[pathnum-1],chromosome_iit,queryseq);
+ }
+
+ } else if (printtype == MAP_EXONS) {
+ for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
+ Stage3_print_iit_exon_map(fp,stage3array[pathnum-1],chromosome_iit,queryseq);
+ }
+
+ } else {
+ fprintf(stderr,"Unexpected printtype %d\n",printtype);
+ abort();
+
+ }
+ }
+
+ return fp;
+}
+
+#endif
+
+
diff --git a/src/output.h b/src/output.h
new file mode 100644
index 0000000..36ded05
--- /dev/null
+++ b/src/output.h
@@ -0,0 +1,57 @@
+/* $Id: output.h 155282 2014-12-12 19:42:54Z twu $ */
+#ifndef OUTPUT_INCLUDED
+#define OUTPUT_INCLUDED
+
+#include "types.h"
+#include "bool.h"
+#include "genomicpos.h"
+#include "iit-read-univ.h"
+#include "iit-read.h"
+#include "filestring.h"
+
+#include "request.h"
+#include "mem.h" /* To get MEMUSAGE */
+#include "stage3.h" /* Has Printtype_T */
+
+#ifdef GSNAP
+#include "resulthr.h"
+#else
+#include "result.h"
+#include "sequence.h"
+#endif
+
+
+extern void
+Output_setup (Univ_IIT_T chromosome_iit_in,
+ bool nofailsp_in, bool failsonlyp_in, bool quiet_if_excessive_p_in, int maxpaths_report_in,
+ char *failedinput_root_in, int quality_shift_in,
+#ifdef GSNAP
+ bool output_sam_p_in, bool print_m8_p_in, bool invert_first_p_in, bool invert_second_p_in,
+ bool merge_samechr_p_in,
+#else
+ Printtype_T printtype_in, int invertmode_in, int wraplength_in, int ngap_in,
+ bool nointronlenp_in, bool sam_paired_p_in, int cds_startpos_in,
+ bool fulllengthp_in, bool truncatep_in, bool strictp_in, bool checksump_in,
+
+ Genome_T genome_in, Sequence_T usersegment_in, char *user_genomicseg_in,
+ char *dbversion_in, char *chrsubset_name_in,
+ Univ_IIT_T contig_iit_in, IIT_T altstrain_iit_in, bool chimeras_allowed_p_in,
+ IIT_T map_iit_in, int *map_divint_crosstable_in, bool map_exons_p_in,
+ bool map_bothstrands_p_in, int nflanking_in, bool print_comment_p_in,
+#endif
+ char *sam_read_group_id_in);
+
+
+#ifdef GSNAP
+extern Filestring_T
+Output_filestring_fromresult (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
+ Result_T result, Request_T request);
+#else
+extern Filestring_T
+Output_filestring_fromresult (Filestring_T *fp_failedinput, Result_T result, Request_T request,
+ Sequence_T headerseq);
+#endif
+
+
+#endif
+
diff --git a/src/pair.c b/src/pair.c
index b20ead6..6cc0bf1 100644
--- a/src/pair.c
+++ b/src/pair.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 161635 2015-03-21 20:17:29Z twu $";
+static char rcsid[] = "$Id: pair.c 166981 2015-06-06 15:53:43Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -33,6 +33,7 @@ static char rcsid[] = "$Id: pair.c 161635 2015-03-21 20:17:29Z twu $";
#include "maxent_hr.h"
#include "mapq.h"
+
#ifndef PMAP
#include "substring.h" /* For Endtype_T */
#include "stage3hr.h"
@@ -48,8 +49,6 @@ static char rcsid[] = "$Id: pair.c 161635 2015-03-21 20:17:29Z twu $";
#define MIN_INTRONLEN 20 /* For deciding between N and D in cigar string */
-
-
/* Check for ANSI mode, which does not include rint */
#ifdef __STRICT_ANSI__
#define rint(x) floor(0.5+(x))
@@ -57,6 +56,8 @@ static char rcsid[] = "$Id: pair.c 161635 2015-03-21 20:17:29Z twu $";
#define DEFAULT_MARGIN 14
+/* #define DIAGNOSTICP 1 */
+
#ifdef DEBUG
#define debug(x) x
#else
@@ -129,15 +130,15 @@ static bool sam_insert_0M_p = false;
static bool force_xs_direction_p;
static bool md_lowercase_variant_p;
static bool snps_p;
+static bool print_nsnpdiffs_p;
static double genomelength; /* For BLAST E-value */
-static Cigar_action_T cigar_action;
void
Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
bool gff3_separators_p_in, bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
- bool md_lowercase_variant_p_in, bool snps_p_in, Univcoord_T genomelength_in,
- Cigar_action_T cigar_action_in) {
+ bool md_lowercase_variant_p_in, bool snps_p_in, bool print_nsnpdiffs_p_in,
+ Univcoord_T genomelength_in) {
trim_mismatch_score = trim_mismatch_score_in;
trim_indel_score = trim_indel_score_in;
gff3_separators_p = gff3_separators_p_in;
@@ -145,8 +146,8 @@ Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
force_xs_direction_p = force_xs_direction_p_in;
md_lowercase_variant_p = md_lowercase_variant_p_in;
snps_p = snps_p_in;
+ print_nsnpdiffs_p = print_nsnpdiffs_p_in;
genomelength = (double) genomelength_in;
- cigar_action = cigar_action_in;
return;
}
@@ -462,7 +463,6 @@ Pair_protect_list (List_T pairs) {
}
-
/* For output thread only. Pairs needed by worker threads are made in pairpool.c */
T
Pair_new_out (int querypos, Chrpos_T genomepos, char cdna, char comp, char genome) {
@@ -520,12 +520,12 @@ Pair_free_out (T *old) {
static char *RULER = " . : . : . : . : . :";
static void
-print_top_ruler (FILE *fp, int n, int npairs, int margin, int wraplength) {
- fprintf(fp,"%*d ",margin,n);
+print_top_ruler (Filestring_T fp, int n, int npairs, int margin, int wraplength) {
+ FPRINTF(fp,"%*d ",margin,n);
if (n + wraplength < npairs) {
- fprintf(fp,"%s\n",RULER);
+ FPRINTF(fp,"%s\n",RULER);
} else {
- fprintf(fp,"%.*s\n",npairs-n,RULER);
+ FPRINTF(fp,"%.*s\n",npairs-n,RULER);
}
return;
}
@@ -545,17 +545,17 @@ print_bottom_ruler (int n, int npairs, int margin, int wraplength) {
static void
-print_cdna_sequence (FILE *fp, struct T *ptr, int n, int npairs, int margin, int wraplength) {
+print_cdna_sequence (Filestring_T fp, struct T *ptr, int n, int npairs, int margin, int wraplength) {
struct T *this;
int i;
this = ptr;
- fprintf(fp,"%*u ",margin,this->querypos + ONEBASEDP);
+ FPRINTF(fp,"%*u ",margin,this->querypos + ONEBASEDP);
for (i = 0; n < npairs && i < wraplength; n++, i++) {
this = ptr++;
- putc(this->cdna,fp);
+ PUTC(this->cdna,fp);
}
- putc('\n',fp);
+ PUTC('\n',fp);
return;
}
@@ -584,88 +584,91 @@ find_aapos_in_line (struct T *ptr, int n, int npairs, int wraplength,
static void
-print_peptide (FILE *fp, struct T *ptr, int n, int npairs, int margin,
+print_peptide (Filestring_T fp, struct T *ptr, int n, int npairs, int margin,
int wraplength, bool genomep) {
struct T *this;
int aapos, i;
if ((aapos = find_aapos_in_line(ptr,n,npairs,wraplength,genomep)) < 0) {
- fprintf(fp,"%*s ",margin,"");
+ FPRINTF(fp,"%*s ",margin,"");
} else {
/* 4 is length of "aa.c" and "aa.g" */
if (genomep == true) {
- fprintf(fp,"aa.g%*d ",margin-4,aapos);
+ FPRINTF(fp,"aa.g%*d ",margin-4,aapos);
} else {
- fprintf(fp,"aa.c%*d ",margin-4,aapos);
+ FPRINTF(fp,"aa.c%*d ",margin-4,aapos);
}
}
if (genomep == true) {
for (i = 0; n < npairs && i < wraplength; n++, i++) {
this = ptr++;
- putc(this->aa_g,fp);
+ PUTC(this->aa_g,fp);
}
} else {
for (i = 0; n < npairs && i < wraplength; n++, i++) {
this = ptr++;
- putc(this->aa_e,fp);
+ PUTC(this->aa_e,fp);
}
}
- putc('\n',fp);
+ PUTC('\n',fp);
return;
}
static void
-print_alignment (FILE *fp, struct T *ptr, int n, int npairs, bool diagnosticp,
+print_alignment (Filestring_T fp, struct T *ptr, int n, int npairs,
int margin, int wraplength) {
struct T *this;
int i;
- fprintf(fp,"%*s ",margin,"");
+ FPRINTF(fp,"%*s ",margin,"");
for (i = 0; n < npairs && i < wraplength; n++, i++) {
this = ptr++;
- if (diagnosticp == true) {
- /* Subtract 1 because dynprogindices start at +1 and -1 */
- if (this->comp == DYNPROG_MATCH_COMP) {
- if (this->dynprogindex > 0) {
- fprintf(fp,"%c",(this->dynprogindex-1)%26+'a');
- } else if (this->dynprogindex < 0) {
- fprintf(fp,"%c",(-this->dynprogindex-1)%26+'A');
- } else {
- putc(DYNPROG_MATCH_COMP,fp);
- }
- } else if (this->shortexonp == true) {
- putc(DIAGNOSTIC_SHORTEXON_COMP,fp);
+#ifdef DIAGNOSTICP
+ /* Subtract 1 because dynprogindices start at +1 and -1 */
+ if (this->comp == DYNPROG_MATCH_COMP) {
+ if (this->dynprogindex > 0) {
+ FPRINTF(fp,"%c",(this->dynprogindex-1)%26+'a');
+ } else if (this->dynprogindex < 0) {
+ FPRINTF(fp,"%c",(-this->dynprogindex-1)%26+'A');
} else {
- putc(this->comp,fp);
+ PUTC(DYNPROG_MATCH_COMP,fp);
}
+ } else if (this->shortexonp == true) {
+ PUTC(DIAGNOSTIC_SHORTEXON_COMP,fp);
+ } else {
+ PUTC(this->comp,fp);
+ }
- } else if (this->comp == DYNPROG_MATCH_COMP) {
- putc(MATCH_COMP,fp);
+#else
+ if (this->comp == DYNPROG_MATCH_COMP) {
+ PUTC(MATCH_COMP,fp);
} else if (this->comp == AMBIGUOUS_COMP) {
#ifdef PMAP
- putc(AMBIGUOUS_COMP,fp);
+ PUTC(AMBIGUOUS_COMP,fp);
#else
- putc(MISMATCH_COMP,fp);
+ PUTC(MISMATCH_COMP,fp);
#endif
} else if (this->comp == SHORTGAP_COMP) {
- putc(INDEL_COMP,fp);
+ PUTC(INDEL_COMP,fp);
} else if (this->comp == EXTRAEXON_COMP) {
- putc(INTRONGAP_COMP,fp);
+ PUTC(INTRONGAP_COMP,fp);
} else {
- putc(this->comp,fp);
+ PUTC(this->comp,fp);
}
+#endif
+
}
- putc('\n',fp);
+ PUTC('\n',fp);
return;
}
static void
-print_genomic_sequence (FILE *fp, struct T *ptr, int n, int npairs,
+print_genomic_sequence (Filestring_T fp, struct T *ptr, int n, int npairs,
char *chrstring, Univcoord_T chroffset,
int margin, int wraplength) {
struct T *this;
@@ -678,21 +681,21 @@ print_genomic_sequence (FILE *fp, struct T *ptr, int n, int npairs,
} else {
sprintf(Buffer,"%s:%u",chrstring,this->genomepos + ONEBASEDP);
}
- fprintf(fp,"%*s ",margin,Buffer);
+ FPRINTF(fp,"%*s ",margin,Buffer);
for (i = 0; n < npairs && i < wraplength; n++, i++) {
this = ptr++;
if (this->comp == EXTRAEXON_COMP) {
- putc(INTRONGAP_CHAR,fp);
+ PUTC(INTRONGAP_CHAR,fp);
} else {
- putc(this->genome,fp);
+ PUTC(this->genome,fp);
}
}
- putc('\n',fp);
+ PUTC('\n',fp);
return;
}
static void
-print_genomicalt_sequence (FILE *fp, struct T *ptr, int n, int npairs,
+print_genomicalt_sequence (Filestring_T fp, struct T *ptr, int n, int npairs,
char *chrstring, Univcoord_T chroffset,
int margin, int wraplength) {
struct T *this;
@@ -705,18 +708,18 @@ print_genomicalt_sequence (FILE *fp, struct T *ptr, int n, int npairs,
} else {
sprintf(Buffer,"%s:%u",chrstring,this->genomepos + ONEBASEDP);
}
- fprintf(fp,"%*s ",margin,Buffer);
+ FPRINTF(fp,"%*s ",margin,Buffer);
for (i = 0; n < npairs && i < wraplength; n++, i++) {
this = ptr++;
if (this->comp == EXTRAEXON_COMP) {
- putc(INTRONGAP_CHAR,fp);
+ PUTC(INTRONGAP_CHAR,fp);
} else if (this->genomealt == this->genome) {
- putc(' ',fp);
+ PUTC(' ',fp);
} else {
- putc(this->genomealt,fp);
+ PUTC(this->genomealt,fp);
}
}
- putc('\n',fp);
+ PUTC('\n',fp);
return;
}
@@ -919,9 +922,8 @@ Pair_translation_length (struct T *pairs, int npairs) {
void
-Pair_print_continuous (FILE *fp, struct T *pairs, int npairs, bool watsonp,
- bool diagnosticp, bool genomefirstp, int invertmode,
- bool nointronlenp) {
+Pair_print_continuous (Filestring_T fp, struct T *pairs, int npairs, bool watsonp,
+ bool genomefirstp, int invertmode, bool nointronlenp) {
T this;
struct T *save = NULL, *ptr;
int n = 0;
@@ -946,69 +948,80 @@ Pair_print_continuous (FILE *fp, struct T *pairs, int npairs, bool watsonp,
ptr = pairs;
for (n = 0; n < npairs; n++) {
this = ptr++;
- putc(this->genome,fp);
+ PUTC(this->genome,fp);
}
- putc('\n',fp);
+ PUTC('\n',fp);
ptr = pairs;
for (n = 0; n < npairs; n++) {
this = ptr++;
+#ifdef DIAGNOSTICP
+ PUTC(this->comp,fp);
+#else
if (this->comp == MATCH_COMP) {
- putc(MATCH_COMP,fp);
- } else if (diagnosticp == false && this->comp == DYNPROG_MATCH_COMP) {
- putc(MATCH_COMP,fp);
- } else if (diagnosticp == false && this->comp == AMBIGUOUS_COMP) {
+ PUTC(MATCH_COMP,fp);
+ } else if (this->comp == DYNPROG_MATCH_COMP) {
+ PUTC(MATCH_COMP,fp);
+ } else if (this->comp == AMBIGUOUS_COMP) {
#ifdef PMAP
- putc(AMBIGUOUS_COMP,fp);
+ PUTC(AMBIGUOUS_COMP,fp);
#else
- putc(MISMATCH_COMP,fp);
+ PUTC(MISMATCH_COMP,fp);
#endif
} else {
- putc(this->comp,fp);
+ PUTC(this->comp,fp);
}
+#endif
+
}
- putc('\n',fp);
+ PUTC('\n',fp);
ptr = pairs;
for (n = 0; n < npairs; n++) {
this = ptr++;
- putc(this->cdna,fp);
+ PUTC(this->cdna,fp);
}
- putc('\n',fp);
+ PUTC('\n',fp);
} else {
ptr = pairs;
for (n = 0; n < npairs; n++) {
this = ptr++;
- putc(this->cdna,fp);
+ PUTC(this->cdna,fp);
}
- putc('\n',fp);
+ PUTC('\n',fp);
ptr = pairs;
for (n = 0; n < npairs; n++) {
this = ptr++;
+
+#ifdef DIAGNOSTICP
+ PUTC(this->comp,fp);
+#else
if (this->comp == MATCH_COMP) {
- putc(MATCH_COMP,fp);
- } else if (diagnosticp == false && this->comp == DYNPROG_MATCH_COMP) {
- putc(MATCH_COMP,fp);
- } else if (diagnosticp == false && this->comp == AMBIGUOUS_COMP) {
+ PUTC(MATCH_COMP,fp);
+ } else if (this->comp == DYNPROG_MATCH_COMP) {
+ PUTC(MATCH_COMP,fp);
+ } else if (this->comp == AMBIGUOUS_COMP) {
#ifdef PMAP
- putc(AMBIGUOUS_COMP,fp);
+ PUTC(AMBIGUOUS_COMP,fp);
#else
- putc(MISMATCH_COMP,fp);
+ PUTC(MISMATCH_COMP,fp);
#endif
} else {
- putc(this->comp,fp);
+ PUTC(this->comp,fp);
}
+#endif
+
}
- putc('\n',fp);
+ PUTC('\n',fp);
ptr = pairs;
for (n = 0; n < npairs; n++) {
this = ptr++;
- putc(this->genome,fp);
+ PUTC(this->genome,fp);
}
- putc('\n',fp);
+ PUTC('\n',fp);
}
if (save != NULL) {
@@ -1020,7 +1033,7 @@ Pair_print_continuous (FILE *fp, struct T *pairs, int npairs, bool watsonp,
void
-Pair_print_continuous_byexon (FILE *fp, struct T *pairs, int npairs, bool watsonp, bool diagnosticp, int invertmode) {
+Pair_print_continuous_byexon (Filestring_T fp, struct T *pairs, int npairs, bool watsonp, int invertmode) {
T this;
struct T *save = NULL, *ptr;
int i = 0, j;
@@ -1044,41 +1057,48 @@ Pair_print_continuous_byexon (FILE *fp, struct T *pairs, int npairs, bool watson
this = ptr;
while (j < npairs && this->gapp == false) {
- putc(this->genome,fp);
+ PUTC(this->genome,fp);
this++;
j++;
}
- putc('\n',fp);
+ PUTC('\n',fp);
j = i;
this = ptr;
while (j < npairs && this->gapp == false) {
+
+#ifdef DIAGNOSTICP
+ PUTC(this->comp,fp);
+
+#else
if (this->comp == MATCH_COMP) {
- putc(MATCH_COMP,fp);
- } else if (diagnosticp == false && this->comp == DYNPROG_MATCH_COMP) {
- putc(MATCH_COMP,fp);
- } else if (diagnosticp == false && this->comp == AMBIGUOUS_COMP) {
+ PUTC(MATCH_COMP,fp);
+ } else if (this->comp == DYNPROG_MATCH_COMP) {
+ PUTC(MATCH_COMP,fp);
+ } else if (this->comp == AMBIGUOUS_COMP) {
#ifdef PMAP
- putc(AMBIGUOUS_COMP,fp);
+ PUTC(AMBIGUOUS_COMP,fp);
#else
- putc(MISMATCH_COMP,fp);
+ PUTC(MISMATCH_COMP,fp);
#endif
} else {
- putc(this->comp,fp);
+ PUTC(this->comp,fp);
}
+#endif
+
this++;
j++;
}
- putc('\n',fp);
+ PUTC('\n',fp);
j = i;
this = ptr;
while (j < npairs && this->gapp == false) {
- putc(this->cdna,fp);
+ PUTC(this->cdna,fp);
this++;
j++;
}
- fprintf(fp,"\n\n");
+ FPRINTF(fp,"\n\n");
i = j;
while (i < npairs && this->gapp == true) {
@@ -1096,10 +1116,9 @@ Pair_print_continuous_byexon (FILE *fp, struct T *pairs, int npairs, bool watson
void
-Pair_print_alignment (FILE *fp, struct T *pairs, int npairs, Chrnum_T chrnum,
+Pair_print_alignment (Filestring_T fp, struct T *pairs, int npairs, Chrnum_T chrnum,
Univcoord_T chroffset, Univ_IIT_T chromosome_iit, bool watsonp,
- bool diagnosticp, int invertmode, bool nointronlenp,
- int wraplength) {
+ int invertmode, bool nointronlenp, int wraplength) {
struct T *save = NULL, *ptr;
int n = 0, i;
char *chrstring = NULL;
@@ -1147,10 +1166,10 @@ Pair_print_alignment (FILE *fp, struct T *pairs, int npairs, Chrnum_T chrnum,
}
print_genomic_sequence(fp,ptr,n,npairs,chrstring,
chroffset,margin,wraplength);
- print_alignment(fp,ptr,n,npairs,diagnosticp,margin,wraplength);
+ print_alignment(fp,ptr,n,npairs,margin,wraplength);
print_cdna_sequence(fp,ptr,n,npairs,margin,wraplength);
print_peptide(fp,ptr,n,npairs,margin,wraplength,/*genomep*/false);
- putc('\n',fp);
+ PUTC('\n',fp);
for (i = 0; n < npairs && i < wraplength; n++, i++) {
ptr++;
}
@@ -1165,7 +1184,7 @@ Pair_print_alignment (FILE *fp, struct T *pairs, int npairs, Chrnum_T chrnum,
}
void
-Pair_print_pathsummary (FILE *fp, int pathnum, T start, T end, Chrnum_T chrnum,
+Pair_print_pathsummary (Filestring_T fp, int pathnum, T start, T end, Chrnum_T chrnum,
Univcoord_T chroffset, Univ_IIT_T chromosome_iit, bool referencealignp,
IIT_T altstrain_iit, char *strain, Univ_IIT_T contig_iit, char *dbversion,
int querylength_given, int skiplength, int trim_start, int trim_end,
@@ -1173,8 +1192,7 @@ Pair_print_pathsummary (FILE *fp, int pathnum, T start, T end, Chrnum_T chrnum,
int qopens, int qindels, int topens, int tindels, int goodness,
bool watsonp, int cdna_direction,
int translation_start, int translation_end, int translation_length,
- int relaastart, int relaaend, bool maponlyp,
- bool diagnosticp, int stage2_source, int stage2_indexsize) {
+ int relaastart, int relaaend, int stage2_source, int stage2_indexsize) {
int querypos1, querypos2, den;
double fracidentity, coverage, trimmed_coverage;
Univcoord_T position1, position2;
@@ -1184,8 +1202,8 @@ Pair_print_pathsummary (FILE *fp, int pathnum, T start, T end, Chrnum_T chrnum,
querypos1 = start->querypos;
querypos2 = end->querypos;
- fprintf(fp," Path %d: ",pathnum);
- fprintf(fp,"query %d%s%d (%d bp) => ",
+ FPRINTF(fp," Path %d: ",pathnum);
+ FPRINTF(fp,"query %d%s%d (%d bp) => ",
querypos1 + ONEBASEDP,SEPARATOR,querypos2 + ONEBASEDP,querypos2-querypos1+1);
chrpos1 = start->genomepos;
@@ -1195,44 +1213,31 @@ Pair_print_pathsummary (FILE *fp, int pathnum, T start, T end, Chrnum_T chrnum,
comma2 = Genomicpos_commafmt(chrpos2 + ONEBASEDP);
if (chrnum == 0) {
if (watsonp) {
- fprintf(fp,"genome %s%s%s (%d bp)\n",
+ FPRINTF(fp,"genome %s%s%s (%d bp)\n",
comma1,SEPARATOR,comma2,chrpos2-chrpos1+1);
} else {
- fprintf(fp,"genome %s%s%s (%d bp)\n",
+ FPRINTF(fp,"genome %s%s%s (%d bp)\n",
comma1,SEPARATOR,comma2,chrpos2-chrpos1-1);
}
} else {
chr = Chrnum_to_string(chrnum,chromosome_iit);
if (watsonp) {
- fprintf(fp,"genome %s:%s%s%s (%d bp)\n",chr,comma1,SEPARATOR,comma2,chrpos2-chrpos1+1);
+ FPRINTF(fp,"genome %s:%s%s%s (%d bp)\n",chr,comma1,SEPARATOR,comma2,chrpos2-chrpos1+1);
} else {
- fprintf(fp,"genome %s:%s%s%s (%d bp)\n",chr,comma1,SEPARATOR,comma2,chrpos2-chrpos1-1);
+ FPRINTF(fp,"genome %s:%s%s%s (%d bp)\n",chr,comma1,SEPARATOR,comma2,chrpos2-chrpos1-1);
}
FREE(chr);
}
FREE(comma2);
FREE(comma1);
- if (maponlyp == false) {
-
- if (diagnosticp == true) {
- /* fprintf(fp," Stage 2 diag runtime: %.3f sec\n",stage2_diag_runtime); */
- /* fprintf(fp," Stage 2 align runtime: %.3f sec\n",stage2_align_runtime); */
- fprintf(fp," Stage 2 source: %d\n",stage2_source);
- fprintf(fp," Stage 2 indexsize: %d\n",stage2_indexsize);
- /* fprintf(fp," Stage 3 runtime: %.3f sec\n",stage3_runtime); */
- /* fprintf(fp," Stage 3 defectrate: %f\n",stage3_defectrate); */
- fprintf(fp," Goodness: %d\n",goodness);
- }
-
- fprintf(fp," cDNA direction: ");
- if (cdna_direction > 0) {
- fprintf(fp,"sense\n");
- } else if (cdna_direction < 0) {
- fprintf(fp,"antisense\n");
- } else {
- fprintf(fp,"indeterminate\n");
- }
+ FPRINTF(fp," cDNA direction: ");
+ if (cdna_direction > 0) {
+ FPRINTF(fp,"sense\n");
+ } else if (cdna_direction < 0) {
+ FPRINTF(fp,"antisense\n");
+ } else {
+ FPRINTF(fp,"indeterminate\n");
}
if (altstrain_iit != NULL) {
@@ -1240,12 +1245,12 @@ Pair_print_pathsummary (FILE *fp, int pathnum, T start, T end, Chrnum_T chrnum,
refstrain = IIT_typestring(altstrain_iit,/*straintype*/0);
if (refstrain[0] == '\0') {
/* Backward compatibility with old altstrain_iit */
- fprintf(fp," Strain: reference\n");
+ FPRINTF(fp," Strain: reference\n");
} else {
- fprintf(fp," Strain: %s (reference)\n",refstrain);
+ FPRINTF(fp," Strain: %s (reference)\n",refstrain);
}
} else {
- fprintf(fp," Strain: %s\n",strain);
+ FPRINTF(fp," Strain: %s\n",strain);
}
}
@@ -1254,14 +1259,14 @@ Pair_print_pathsummary (FILE *fp, int pathnum, T start, T end, Chrnum_T chrnum,
comma1 = Genomicpos_commafmt(position1 + ONEBASEDP);
comma2 = Genomicpos_commafmt(position2 + ONEBASEDP);
if (dbversion == NULL) {
- fprintf(fp," Genomic pos: %s%s%s",comma1,SEPARATOR,comma2);
+ FPRINTF(fp," Genomic pos: %s%s%s",comma1,SEPARATOR,comma2);
} else {
- fprintf(fp," Genomic pos: %s:%s%s%s",dbversion,comma1,SEPARATOR,comma2);
+ FPRINTF(fp," Genomic pos: %s:%s%s%s",dbversion,comma1,SEPARATOR,comma2);
}
if (chrpos1 <= chrpos2) {
- fprintf(fp," (+ strand)\n");
+ FPRINTF(fp," (+ strand)\n");
} else {
- fprintf(fp," (- strand)\n");
+ FPRINTF(fp," (- strand)\n");
}
FREE(comma2);
FREE(comma1);
@@ -1274,83 +1279,81 @@ Pair_print_pathsummary (FILE *fp, int pathnum, T start, T end, Chrnum_T chrnum,
}
}
- if (maponlyp == false) {
- fprintf(fp," Number of exons: %d\n",nexons);
+ FPRINTF(fp," Number of exons: %d\n",nexons);
#ifdef PMAP
- coverage = (double) (querypos2 - querypos1 + 1)/(double) (3*(querylength_given + skiplength));
- /* coverage = (double) (matches + mismatches + qindels)/(double) (3*(querylength_given + skiplength)); */
+ coverage = (double) (querypos2 - querypos1 + 1)/(double) (3*(querylength_given + skiplength));
+ /* coverage = (double) (matches + mismatches + qindels)/(double) (3*(querylength_given + skiplength)); */
- /* Can have coverage greater than given querylength because of added '*' at end */
- if (coverage > 1.0) {
- coverage = 1.0;
- }
+ /* Can have coverage greater than given querylength because of added '*' at end */
+ if (coverage > 1.0) {
+ coverage = 1.0;
+ }
#else
- /* coverage = (double) (matches + mismatches + qindels)/(double) (querylength_given + skiplength); */
- coverage = (double) (querypos2 - querypos1 + 1)/(double) (querylength_given + skiplength);
+ /* coverage = (double) (matches + mismatches + qindels)/(double) (querylength_given + skiplength); */
+ coverage = (double) (querypos2 - querypos1 + 1)/(double) (querylength_given + skiplength);
#endif
- fprintf(fp," Coverage: %.1f",((double) rint(1000.0*coverage))/10.0);
+ FPRINTF(fp," Coverage: %.1f",((double) rint(1000.0*coverage))/10.0);
#ifdef PMAP
- fprintf(fp," (query length: %d aa)\n",querylength_given);
+ FPRINTF(fp," (query length: %d aa)\n",querylength_given);
#else
- fprintf(fp," (query length: %d bp)\n",querylength_given);
- if (querypos2 + 1 > trim_end) {
- trim_end = querypos2 + 1;
- }
- if (querypos1 < trim_start) {
- trim_start = querypos1;
- }
+ FPRINTF(fp," (query length: %d bp)\n",querylength_given);
+ if (querypos2 + 1 > trim_end) {
+ trim_end = querypos2 + 1;
+ }
+ if (querypos1 < trim_start) {
+ trim_start = querypos1;
+ }
- trimmed_coverage = (double) (querypos2 - querypos1 + 1)/(double) (trim_end - trim_start + skiplength);
- fprintf(fp," Trimmed coverage: %.1f",((double) rint(1000.0*trimmed_coverage))/10.0);
- fprintf(fp," (trimmed length: %d bp, trimmed region: %d..%d)",
- trim_end-trim_start,trim_start+ONEBASEDP,trim_end-1+ONEBASEDP);
- putc('\n',fp);
+ trimmed_coverage = (double) (querypos2 - querypos1 + 1)/(double) (trim_end - trim_start + skiplength);
+ FPRINTF(fp," Trimmed coverage: %.1f",((double) rint(1000.0*trimmed_coverage))/10.0);
+ FPRINTF(fp," (trimmed length: %d bp, trimmed region: %d..%d)",
+ trim_end-trim_start,trim_start+ONEBASEDP,trim_end-1+ONEBASEDP);
+ PUTC('\n',fp);
#endif
- if ((den = matches + mismatches + qindels + tindels) == 0) {
- fracidentity = 1.0;
- } else {
- fracidentity = (double) matches/(double) den;
- }
+ if ((den = matches + mismatches + qindels + tindels) == 0) {
+ fracidentity = 1.0;
+ } else {
+ fracidentity = (double) matches/(double) den;
+ }
- /* The definition of indels here should be consistent with Stage3_indels */
- fprintf(fp," Percent identity: %.1f (%d matches, %d mismatches, %d indels, %d unknowns)\n",
- ((double) rint(1000.0*fracidentity))/10.0,matches,mismatches,qindels+tindels,unknowns);
- if (qindels + tindels > 0) {
- fprintf(fp," Non-intron gaps: %d openings, %d bases in cdna; %d openings, %d bases in genome\n",
- qopens,qindels,topens,tindels);
- }
+ /* The definition of indels here should be consistent with Stage3_indels */
+ FPRINTF(fp," Percent identity: %.1f (%d matches, %d mismatches, %d indels, %d unknowns)\n",
+ ((double) rint(1000.0*fracidentity))/10.0,matches,mismatches,qindels+tindels,unknowns);
+ if (qindels + tindels > 0) {
+ FPRINTF(fp," Non-intron gaps: %d openings, %d bases in cdna; %d openings, %d bases in genome\n",
+ qopens,qindels,topens,tindels);
+ }
#ifndef PMAP
- if (translation_length > 0) {
- if (cdna_direction >= 0) {
- fprintf(fp," Translation: %d..%d (%d aa)\n",
- translation_start+ONEBASEDP,translation_end+ONEBASEDP,translation_length);
- } else {
- fprintf(fp," Translation: %d..%d (%d aa)\n",
- translation_end+ONEBASEDP,translation_start+ONEBASEDP,translation_length);
- }
- } else if (relaastart > 0) {
- if (relaastart < relaaend) {
- fprintf(fp," Protein coords: %d..%d\n",relaastart,relaaend);
- } else {
- fprintf(fp," Protein coords: %d..%d\n",relaaend,relaastart);
- }
+ if (translation_length > 0) {
+ if (cdna_direction >= 0) {
+ FPRINTF(fp," Translation: %d..%d (%d aa)\n",
+ translation_start+ONEBASEDP,translation_end+ONEBASEDP,translation_length);
+ } else {
+ FPRINTF(fp," Translation: %d..%d (%d aa)\n",
+ translation_end+ONEBASEDP,translation_start+ONEBASEDP,translation_length);
+ }
+ } else if (relaastart > 0) {
+ if (relaastart < relaaend) {
+ FPRINTF(fp," Protein coords: %d..%d\n",relaastart,relaaend);
+ } else {
+ FPRINTF(fp," Protein coords: %d..%d\n",relaaend,relaastart);
}
+ }
#endif
- /* fprintf(fp," Defect rate (percent): %.1f\n",defect_rate*100.0); */
+ /* FPRINTF(fp," Defect rate (percent): %.1f\n",defect_rate*100.0); */
- /* putc('\n',fp); -- Done by caller */
- }
+ /* PUTC('\n',fp); -- Done by caller */
return;
}
void
-Pair_print_coordinates (FILE *fp, struct T *pairs, int npairs, Chrnum_T chrnum,
+Pair_print_coordinates (Filestring_T fp, struct T *pairs, int npairs, Chrnum_T chrnum,
Univcoord_T chroffset, Univ_IIT_T chromosome_iit,
bool watsonp, int invertmode) {
T this;
@@ -1390,39 +1393,39 @@ Pair_print_coordinates (FILE *fp, struct T *pairs, int npairs, Chrnum_T chrnum,
this = pairs++;
if (this->gapp == false) {
#ifdef DEBUG5
- fprintf(fp,"%d %d %c\t",this->aapos,this->aaphase_e,this->aa_e);
+ FPRINTF(fp,"%d %d %c\t",this->aapos,this->aaphase_e,this->aa_e);
#else
if (this->aaphase_e != 0) {
- fprintf(fp,"%d\t",this->aapos);
+ FPRINTF(fp,"%d\t",this->aapos);
} else {
- fprintf(fp,"%d %c\t",this->aapos,this->aa_e);
+ FPRINTF(fp,"%d %c\t",this->aapos,this->aa_e);
}
#endif
- fprintf(fp,"%d %c\t",this->querypos + ONEBASEDP,this->cdna);
+ FPRINTF(fp,"%d %c\t",this->querypos + ONEBASEDP,this->cdna);
if (chrstring == NULL) {
- fprintf(fp,"%u %u %c",this->genomepos + ONEBASEDP,
+ FPRINTF(fp,"%u %u %c",this->genomepos + ONEBASEDP,
chroffset + this->genomepos + ONEBASEDP,
this->genome);
} else {
- fprintf(fp,"%s:%u %u %c",chrstring,
+ FPRINTF(fp,"%s:%u %u %c",chrstring,
this->genomepos + ONEBASEDP,
chroffset + this->genomepos + ONEBASEDP,
this->genome);
}
if (this->genomealt != this->genome) {
- fprintf(fp," %c",this->genomealt);
+ FPRINTF(fp," %c",this->genomealt);
}
#ifdef DEBUG5
- fprintf(fp,"\t%d %c",this->aaphase_g,this->aa_g);
+ FPRINTF(fp,"\t%d %c",this->aaphase_g,this->aa_g);
#else
if (this->aaphase_g != 0) {
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
} else {
- fprintf(fp,"\t%c",this->aa_g);
+ FPRINTF(fp,"\t%c",this->aa_g);
}
#endif
- putc('\n',fp);
+ PUTC('\n',fp);
}
}
@@ -1436,6 +1439,25 @@ Pair_print_coordinates (FILE *fp, struct T *pairs, int npairs, Chrnum_T chrnum,
}
+int
+Pair_cmp (const void *a, const void *b) {
+ T x = * (T *) a;
+ T y = * (T *) b;
+
+ if (x->querypos < y->querypos) {
+ return -1;
+ } else if (y->querypos < x->querypos) {
+ return +1;
+ } else if (x->genomepos < y->genomepos) {
+ return -1;
+ } else if (y->genomepos < x->genomepos) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+
void
Pair_dump_one (T this, bool zerobasedp) {
@@ -1492,9 +1514,11 @@ Pair_dump_one (T this, bool zerobasedp) {
printf(" shortexon");
}
+#if 0
if (this->state == BAD) {
printf(" bad");
}
+#endif
return;
}
@@ -1549,6 +1573,39 @@ Pair_dump_array (struct T *pairs, int npairs, bool zerobasedp) {
}
+void
+Pair_dump_array_stderr (struct T *pairs, int npairs, bool zerobasedp) {
+ struct T *this;
+ int i;
+
+ for (i = 0; i < npairs; i++) {
+ this = pairs++;
+ fprintf(stderr,"%d: %d %d %d %c ",
+ i,this->querypos + !zerobasedp,this->genomepos + !zerobasedp,this->aapos,
+ this->cdna);
+
+ /* Subtract 1 because dynprogindices start at +1 and -1 */
+ if (this->dynprogindex > 0) {
+ fprintf(stderr,"%c%c",this->comp,(this->dynprogindex-1)%26+'a');
+ } else if (this->dynprogindex < 0) {
+ fprintf(stderr,"%c%c",this->comp,(-this->dynprogindex-1)%26+'A');
+ } else {
+ putc(this->comp,stderr);
+ }
+ fprintf(stderr," %c",this->genome);
+ if (this->genomealt != this->genome) {
+ fprintf(stderr," alt:%c",this->genomealt);
+ }
+
+ if (this->aaphase_g == 0 || this->aaphase_e == 0) {
+ fprintf(stderr," => %c %c",this->aa_g,this->aa_e);
+ }
+ fprintf(stderr,"\n");
+ }
+ return;
+}
+
+
Chrpos_T
Pair_genomicpos (struct T *pairs, int npairs, int querypos, bool headp) {
struct T *this;
@@ -1708,7 +1765,7 @@ Pair_check_array (struct T *pairs, int npairs) {
/* Called by output thread for --merge-overlap feature. Modeled after Substring_convert_to_pairs. */
List_T
Pair_convert_array_to_pairs (List_T pairs, struct T *pairarray, int npairs, bool plusp, int querylength,
- int clipdir, int hardclip_low, int hardclip_high, bool first_read_p, int queryseq_offset) {
+ int hardclip_low, int hardclip_high, int queryseq_offset) {
T pair;
int querystart, queryend, i;
@@ -1862,7 +1919,7 @@ unknown_base (char c) {
}
void
-Pair_print_exonsummary (FILE *fp, struct T *pairs, int npairs, Chrnum_T chrnum,
+Pair_print_exonsummary (Filestring_T fp, struct T *pairs, int npairs, Chrnum_T chrnum,
Univcoord_T chroffset, Genome_T genome, Univ_IIT_T chromosome_iit,
bool watsonp, int cdna_direction, bool genomefirstp, int invertmode) {
bool in_exon = false;
@@ -1912,50 +1969,50 @@ Pair_print_exonsummary (FILE *fp, struct T *pairs, int npairs, Chrnum_T chrnum,
intron_start = exon_genomeend - 1;
}
if (genomefirstp == true) {
- fprintf(fp," ");
+ FPRINTF(fp," ");
if (chrnum == 0) {
- fprintf(fp,"%u-%u",chroffset+exon_genomestart,chroffset+exon_genomeend);
+ FPRINTF(fp,"%u-%u",chroffset+exon_genomestart,chroffset+exon_genomeend);
} else {
- fprintf(fp,"%s:%d-%d",chrstring,exon_genomestart,exon_genomeend);
+ FPRINTF(fp,"%s:%d-%d",chrstring,exon_genomestart,exon_genomeend);
}
- fprintf(fp," (%d-%d)",exon_querystart,exon_queryend);
+ FPRINTF(fp," (%d-%d)",exon_querystart,exon_queryend);
} else {
- fprintf(fp," %d-%d",exon_querystart,exon_queryend);
- fprintf(fp," ");
+ FPRINTF(fp," %d-%d",exon_querystart,exon_queryend);
+ FPRINTF(fp," ");
if (chrnum == 0) {
- fprintf(fp,"(%u-%u)",chroffset+exon_genomestart,chroffset+exon_genomeend);
+ FPRINTF(fp,"(%u-%u)",chroffset+exon_genomestart,chroffset+exon_genomeend);
} else {
- fprintf(fp,"(%s:%d-%d)",chrstring,exon_genomestart,exon_genomeend);
+ FPRINTF(fp,"(%s:%d-%d)",chrstring,exon_genomestart,exon_genomeend);
}
}
if (den == 0) {
- fprintf(fp," %d%%",100);
+ FPRINTF(fp," %d%%",100);
} else {
- fprintf(fp," %d%%",(int) floor(100.0*(double) num/(double) den));
+ FPRINTF(fp," %d%%",(int) floor(100.0*(double) num/(double) den));
}
if (this->comp == FWD_CANONICAL_INTRON_COMP) {
- fprintf(fp," ->");
+ FPRINTF(fp," ->");
/* sensep = true; */
} else if (this->comp == REV_CANONICAL_INTRON_COMP) {
- fprintf(fp," <-");
+ FPRINTF(fp," <-");
/* sensep = false; */
} else if (this->comp == FWD_GCAG_INTRON_COMP) {
- fprintf(fp," -)");
+ FPRINTF(fp," -)");
/* sensep = true; */
} else if (this->comp == REV_GCAG_INTRON_COMP) {
- fprintf(fp," (-");
+ FPRINTF(fp," (-");
/* sensep = false; */
} else if (this->comp == FWD_ATAC_INTRON_COMP) {
- fprintf(fp," -]");
+ FPRINTF(fp," -]");
/* sensep = true; */
} else if (this->comp == REV_ATAC_INTRON_COMP) {
- fprintf(fp," [-");
+ FPRINTF(fp," [-");
/* sensep = false; */
} else if (this->comp == NONINTRON_COMP) {
- fprintf(fp," ==");
+ FPRINTF(fp," ==");
/* sensep = true; */
} else {
- fprintf(fp," ##");
+ FPRINTF(fp," ##");
/* sensep = true; */
}
in_exon = false;
@@ -1975,28 +2032,28 @@ Pair_print_exonsummary (FILE *fp, struct T *pairs, int npairs, Chrnum_T chrnum,
}
if (i > 0) {
if (intron_end > intron_start) {
- fprintf(fp," ...%d...",intron_end - intron_start + 1);
+ FPRINTF(fp," ...%d...",intron_end - intron_start + 1);
} else {
- fprintf(fp," ...%d...",intron_start - intron_end + 1);
+ FPRINTF(fp," ...%d...",intron_start - intron_end + 1);
}
if (exon_querystart > exon_queryend + 1) {
- fprintf(fp," ***query_skip:%d***",exon_querystart-(exon_queryend+1));
+ FPRINTF(fp," ***query_skip:%d***",exon_querystart-(exon_queryend+1));
}
if (genome != NULL) {
if (cdna_direction >= 0) {
- fprintf(fp," %.3f, %.3f",
+ FPRINTF(fp," %.3f, %.3f",
donor_score(chroffset+exon_genomeend-1,chroffset,!watsonp,genome,chromosome_iit),
acceptor_score(chroffset+exon_genomestart-1,chroffset,!watsonp,genome,chromosome_iit));
} else {
- fprintf(fp," %.3f, %.3f",
+ FPRINTF(fp," %.3f, %.3f",
acceptor_score(chroffset+exon_genomeend-1,chroffset,watsonp,genome,chromosome_iit),
donor_score(chroffset+exon_genomestart-1,chroffset,watsonp,genome,chromosome_iit));
}
}
- putc('\n',fp);
+ PUTC('\n',fp);
}
num = den = 0;
in_exon = true;
@@ -2035,28 +2092,28 @@ Pair_print_exonsummary (FILE *fp, struct T *pairs, int npairs, Chrnum_T chrnum,
exon_queryend = last_querypos + ONEBASEDP;
exon_genomeend = last_genomepos + ONEBASEDP;
if (genomefirstp == true) {
- fprintf(fp," ");
+ FPRINTF(fp," ");
if (chrnum == 0) {
- fprintf(fp,"%u-%u",chroffset+exon_genomestart,chroffset+exon_genomeend);
+ FPRINTF(fp,"%u-%u",chroffset+exon_genomestart,chroffset+exon_genomeend);
} else {
- fprintf(fp,"%s:%d-%d",chrstring,exon_genomestart,exon_genomeend);
+ FPRINTF(fp,"%s:%d-%d",chrstring,exon_genomestart,exon_genomeend);
}
- fprintf(fp," (%d-%d)",exon_querystart,exon_queryend);
+ FPRINTF(fp," (%d-%d)",exon_querystart,exon_queryend);
} else {
- fprintf(fp," %d-%d",exon_querystart,exon_queryend);
- fprintf(fp," ");
+ FPRINTF(fp," %d-%d",exon_querystart,exon_queryend);
+ FPRINTF(fp," ");
if (chrnum == 0) {
- fprintf(fp,"(%u-%u)",chroffset+exon_genomestart,chroffset+exon_genomeend);
+ FPRINTF(fp,"(%u-%u)",chroffset+exon_genomestart,chroffset+exon_genomeend);
} else {
- fprintf(fp,"(%s:%d-%d)",chrstring,exon_genomestart,exon_genomeend);
+ FPRINTF(fp,"(%s:%d-%d)",chrstring,exon_genomestart,exon_genomeend);
}
}
if (den == 0) {
- fprintf(fp," %d%%",100);
+ FPRINTF(fp," %d%%",100);
} else {
- fprintf(fp," %d%%",(int) floor(100.0*(double) num/(double) den));
+ FPRINTF(fp," %d%%",(int) floor(100.0*(double) num/(double) den));
}
- fprintf(fp,"\n\n");
+ FPRINTF(fp,"\n\n");
if (chrstring != NULL) {
FREE(chrstring);
@@ -2068,14 +2125,14 @@ Pair_print_exonsummary (FILE *fp, struct T *pairs, int npairs, Chrnum_T chrnum,
return;
}
-static void
-tokens_free (List_T *tokens) {
+void
+Pair_tokens_free (List_T *tokens) {
List_T p;
char *token;
for (p = *tokens; p != NULL; p = List_next(p)) {
token = (char *) List_head(p);
- FREE(token);
+ FREE_OUT(token);
}
List_free(&(*tokens));
@@ -2083,10 +2140,28 @@ tokens_free (List_T *tokens) {
}
+List_T
+Pair_tokens_copy (List_T old) {
+ List_T new = NULL;
+ char *new_token, *old_token;
+
+ while (old != NULL) {
+ old_token = (char *) List_head(old);
+ new_token = (char *) MALLOC_OUT((strlen(old_token)+1) * sizeof(char));
+ strcpy(new_token,old_token);
+ new = List_push(new,(void *) new_token);
+ old = List_next(old);
+ }
+
+ return List_reverse(new);
+}
+
+
+
/* Tokens used by compressed and gff3 formats */
static void
-print_tokens_compressed (FILE *fp, List_T tokens) {
+print_tokens_compressed (Filestring_T fp, List_T tokens) {
List_T p;
int tokencount = 1;
char *token, *lasttoken = NULL;
@@ -2094,21 +2169,21 @@ print_tokens_compressed (FILE *fp, List_T tokens) {
for (p = tokens; p != NULL; p = List_next(p)) {
token = (char *) List_head(p);
if (lasttoken == NULL) {
- fprintf(fp,"\t%s",token);
+ FPRINTF(fp,"\t%s",token);
lasttoken = token;
} else if (!strcmp(token,lasttoken)) {
tokencount++;
} else {
if (tokencount > 1) {
- fprintf(fp,"!%d",tokencount);
+ FPRINTF(fp,"!%d",tokencount);
}
- fprintf(fp," %s",token);
+ FPRINTF(fp," %s",token);
lasttoken = token;
tokencount = 1;
}
}
if (tokencount > 1) {
- fprintf(fp,"!%d",tokencount);
+ FPRINTF(fp,"!%d",tokencount);
}
for (p = tokens; p != NULL; p = List_next(p)) {
@@ -2120,18 +2195,18 @@ print_tokens_compressed (FILE *fp, List_T tokens) {
}
static void
-print_tokens_gff3 (FILE *fp, List_T tokens) {
+print_tokens_gff3 (Filestring_T fp, List_T tokens) {
List_T p;
char *token;
if (tokens != NULL) {
p = tokens;
token = (char *) List_head(p);
- fprintf(fp,"%s",token);
+ FPRINTF(fp,"%s",token);
for (p = List_next(p); p != NULL; p = List_next(p)) {
token = (char *) List_head(p);
- fprintf(fp," %s",token);
+ FPRINTF(fp," %s",token);
}
}
@@ -2147,7 +2222,7 @@ static List_T
push_token (List_T tokens, char *token) {
char *copy;
- copy = (char *) CALLOC(strlen(token)+1,sizeof(char));
+ copy = (char *) MALLOC_OUT((strlen(token)+1) * sizeof(char));
strcpy(copy,token);
return List_push(tokens,(void *) copy);
}
@@ -2156,45 +2231,45 @@ push_token (List_T tokens, char *token) {
/* Definition of GFF3 format is at http://song.sourceforge.net/gff3.shtml */
static void
-print_gff3_gene (FILE *fp, int pathnum, char *sourcename, char *accession, char *chrstring, Chrpos_T start_genomepos,
+print_gff3_gene (Filestring_T fp, int pathnum, char *sourcename, char *accession, char *chrstring, Chrpos_T start_genomepos,
Chrpos_T end_genomepos, bool watsonp, int cdna_direction) {
- fprintf(fp,"%s\t",chrstring); /* 1: seqid */
- fprintf(fp,"%s\t",sourcename); /* 2: source */
- fprintf(fp,"gene\t"); /* 3: type */
+ FPRINTF(fp,"%s\t",chrstring); /* 1: seqid */
+ FPRINTF(fp,"%s\t",sourcename); /* 2: source */
+ FPRINTF(fp,"gene\t"); /* 3: type */
if (start_genomepos < end_genomepos) {
- fprintf(fp,"%u\t%u\t",start_genomepos,end_genomepos); /* 4,5: start, end */
+ FPRINTF(fp,"%u\t%u\t",start_genomepos,end_genomepos); /* 4,5: start, end */
} else {
- fprintf(fp,"%u\t%u\t",end_genomepos,start_genomepos); /* 4,5: start, end */
+ FPRINTF(fp,"%u\t%u\t",end_genomepos,start_genomepos); /* 4,5: start, end */
}
- fprintf(fp,".\t"); /* 6: score */
+ FPRINTF(fp,".\t"); /* 6: score */
if (watsonp == true) {
if (cdna_direction >= 0) {
- fprintf(fp,"+\t");
+ FPRINTF(fp,"+\t");
} else {
- fprintf(fp,"-\t");
+ FPRINTF(fp,"-\t");
}
} else {
if (cdna_direction >= 0) {
- fprintf(fp,"-\t"); /* 7: strand */
+ FPRINTF(fp,"-\t"); /* 7: strand */
} else {
- fprintf(fp,"+\t");
+ FPRINTF(fp,"+\t");
}
}
- fprintf(fp,".\t"); /* 8: phase */
+ FPRINTF(fp,".\t"); /* 8: phase */
/* 9: features */
- fprintf(fp,"ID=%s.path%d;Name=%s\n",accession,pathnum,accession);
+ FPRINTF(fp,"ID=%s.path%d;Name=%s\n",accession,pathnum,accession);
return;
}
static void
-print_gff3_mrna (FILE *fp, int pathnum, T start, T end,
+print_gff3_mrna (Filestring_T fp, int pathnum, T start, T end,
char *sourcename, char *accession, char *chrstring, Chrpos_T start_genomepos,
Chrpos_T end_genomepos, int querylength_given, int skiplength,
int matches, int mismatches, int qindels, int tindels, int unknowns,
@@ -2203,35 +2278,35 @@ print_gff3_mrna (FILE *fp, int pathnum, T start, T end,
int querypos1, querypos2;
double coverage, fracidentity;
- fprintf(fp,"%s\t",chrstring); /* 1: seqid */
- fprintf(fp,"%s\t",sourcename); /* 2: source */
- fprintf(fp,"mRNA\t"); /* 3: type */
+ FPRINTF(fp,"%s\t",chrstring); /* 1: seqid */
+ FPRINTF(fp,"%s\t",sourcename); /* 2: source */
+ FPRINTF(fp,"mRNA\t"); /* 3: type */
if (start_genomepos < end_genomepos) {
- fprintf(fp,"%u\t%u\t",start_genomepos,end_genomepos); /* 4,5: start, end */
+ FPRINTF(fp,"%u\t%u\t",start_genomepos,end_genomepos); /* 4,5: start, end */
} else {
- fprintf(fp,"%u\t%u\t",end_genomepos,start_genomepos); /* 4,5: start, end */
+ FPRINTF(fp,"%u\t%u\t",end_genomepos,start_genomepos); /* 4,5: start, end */
}
- fprintf(fp,".\t"); /* 6: score */
+ FPRINTF(fp,".\t"); /* 6: score */
if (watsonp == true) {
if (cdna_direction >= 0) {
- fprintf(fp,"+\t");
+ FPRINTF(fp,"+\t");
} else {
- fprintf(fp,"-\t");
+ FPRINTF(fp,"-\t");
}
} else {
if (cdna_direction >= 0) {
- fprintf(fp,"-\t"); /* 7: strand */
+ FPRINTF(fp,"-\t"); /* 7: strand */
} else {
- fprintf(fp,"+\t");
+ FPRINTF(fp,"+\t");
}
}
- fprintf(fp,".\t"); /* 8: phase */
+ FPRINTF(fp,".\t"); /* 8: phase */
/* 9: features */
- fprintf(fp,"ID=%s.mrna%d;Name=%s;Parent=%s.path%d;",
+ FPRINTF(fp,"ID=%s.mrna%d;Name=%s;Parent=%s.path%d;",
accession,pathnum,accession,accession,pathnum);
querypos1 = start->querypos;
@@ -2246,108 +2321,108 @@ print_gff3_mrna (FILE *fp, int pathnum, T start, T end,
#else
coverage = (double) (querypos2 - querypos1 + 1)/(double) (querylength_given + skiplength);
#endif
- fprintf(fp,"coverage=%.1f;",((double) rint(1000.0*coverage))/10.0);
+ FPRINTF(fp,"coverage=%.1f;",((double) rint(1000.0*coverage))/10.0);
if ((den = matches + mismatches + qindels + tindels) == 0) {
fracidentity = 1.0;
} else {
fracidentity = (double) matches/(double) den;
}
- fprintf(fp,"identity=%.1f;",((double) rint(1000.0*fracidentity))/10.0);
- fprintf(fp,"matches=%d;mismatches=%d;indels=%d;unknowns=%d",
+ FPRINTF(fp,"identity=%.1f;",((double) rint(1000.0*fracidentity))/10.0);
+ FPRINTF(fp,"matches=%d;mismatches=%d;indels=%d;unknowns=%d",
matches,mismatches,qindels+tindels,unknowns);
- putc('\n',fp);
+ PUTC('\n',fp);
return;
}
static void
-print_gff3_exon (FILE *fp, int exonno, int pathnum, char *sourcename, char *accession, char *chrstring,
+print_gff3_exon (Filestring_T fp, int exonno, int pathnum, char *sourcename, char *accession, char *chrstring,
int exon_genomestart, int exon_genomeend,
int exon_querystart, int exon_queryend, bool watsonp, int cdna_direction,
int pctidentity) {
- fprintf(fp,"%s\t",chrstring); /* 1: seqid */
- fprintf(fp,"%s\t",sourcename); /* 2: source */
- fprintf(fp,"exon\t"); /* 3: type */
+ FPRINTF(fp,"%s\t",chrstring); /* 1: seqid */
+ FPRINTF(fp,"%s\t",sourcename); /* 2: source */
+ FPRINTF(fp,"exon\t"); /* 3: type */
if (exon_genomestart < exon_genomeend) {
- fprintf(fp,"%u\t%u\t",exon_genomestart,exon_genomeend); /* 4,5: start, end */
+ FPRINTF(fp,"%u\t%u\t",exon_genomestart,exon_genomeend); /* 4,5: start, end */
} else {
- fprintf(fp,"%u\t%u\t",exon_genomeend,exon_genomestart); /* 4,5: start, end */
+ FPRINTF(fp,"%u\t%u\t",exon_genomeend,exon_genomestart); /* 4,5: start, end */
}
- fprintf(fp,"%d\t",pctidentity); /* 6: score */
+ FPRINTF(fp,"%d\t",pctidentity); /* 6: score */
if (watsonp == true) {
if (cdna_direction >= 0) {
- fprintf(fp,"+\t");
+ FPRINTF(fp,"+\t");
} else {
- fprintf(fp,"-\t");
+ FPRINTF(fp,"-\t");
}
} else {
if (cdna_direction >= 0) {
- fprintf(fp,"-\t"); /* 7: strand */
+ FPRINTF(fp,"-\t"); /* 7: strand */
} else {
- fprintf(fp,"+\t");
+ FPRINTF(fp,"+\t");
}
}
- fprintf(fp,".\t"); /* 8: phase */
+ FPRINTF(fp,".\t"); /* 8: phase */
/* 9: features */
- fprintf(fp,"ID=%s.mrna%d.exon%d;",accession,pathnum,exonno);
- fprintf(fp,"Name=%s;",accession);
- fprintf(fp,"Parent=%s.mrna%d;",accession,pathnum);
+ FPRINTF(fp,"ID=%s.mrna%d.exon%d;",accession,pathnum,exonno);
+ FPRINTF(fp,"Name=%s;",accession);
+ FPRINTF(fp,"Parent=%s.mrna%d;",accession,pathnum);
if (cdna_direction >= 0) {
- fprintf(fp,"Target=%s %d %d +\n",accession,exon_querystart,exon_queryend);
+ FPRINTF(fp,"Target=%s %d %d +\n",accession,exon_querystart,exon_queryend);
} else {
- fprintf(fp,"Target=%s %d %d -\n",accession,exon_queryend,exon_querystart);
+ FPRINTF(fp,"Target=%s %d %d -\n",accession,exon_queryend,exon_querystart);
}
return;
}
static void
-print_gff3_cds (FILE *fp, int cdsno, int pathnum, char *sourcename, char *accession, char *chrstring,
+print_gff3_cds (Filestring_T fp, int cdsno, int pathnum, char *sourcename, char *accession, char *chrstring,
int cds_genomestart, int cds_genomeend,
int cds_querystart, int cds_queryend, bool watsonp, int cdna_direction,
int pctidentity, int cds_phase) {
- fprintf(fp,"%s\t",chrstring); /* 1: seqid */
- fprintf(fp,"%s\t",sourcename); /* 2: source */
- fprintf(fp,"CDS\t"); /* 3: type */
+ FPRINTF(fp,"%s\t",chrstring); /* 1: seqid */
+ FPRINTF(fp,"%s\t",sourcename); /* 2: source */
+ FPRINTF(fp,"CDS\t"); /* 3: type */
if (cds_genomestart < cds_genomeend) {
- fprintf(fp,"%u\t%u\t",cds_genomestart,cds_genomeend); /* 4,5: start, end */
+ FPRINTF(fp,"%u\t%u\t",cds_genomestart,cds_genomeend); /* 4,5: start, end */
} else {
- fprintf(fp,"%u\t%u\t",cds_genomeend,cds_genomestart); /* 4,5: start, end */
+ FPRINTF(fp,"%u\t%u\t",cds_genomeend,cds_genomestart); /* 4,5: start, end */
}
- fprintf(fp,"%d\t",pctidentity); /* 6: score */
+ FPRINTF(fp,"%d\t",pctidentity); /* 6: score */
if (watsonp == true) {
if (cdna_direction >= 0) {
- fprintf(fp,"+\t");
+ FPRINTF(fp,"+\t");
} else {
- fprintf(fp,"-\t");
+ FPRINTF(fp,"-\t");
}
} else {
if (cdna_direction >= 0) {
- fprintf(fp,"-\t"); /* 7: strand */
+ FPRINTF(fp,"-\t"); /* 7: strand */
} else {
- fprintf(fp,"+\t");
+ FPRINTF(fp,"+\t");
}
}
- fprintf(fp,"%d\t",cds_phase); /* 8: phase */
+ FPRINTF(fp,"%d\t",cds_phase); /* 8: phase */
/* 9: features */
- fprintf(fp,"ID=%s.mrna%d.cds%d;",accession,pathnum,cdsno);
- fprintf(fp,"Name=%s;",accession);
- fprintf(fp,"Parent=%s.mrna%d;",accession,pathnum);
+ FPRINTF(fp,"ID=%s.mrna%d.cds%d;",accession,pathnum,cdsno);
+ FPRINTF(fp,"Name=%s;",accession);
+ FPRINTF(fp,"Parent=%s.mrna%d;",accession,pathnum);
if (cdna_direction >= 0) {
- fprintf(fp,"Target=%s %d %d +\n",accession,cds_querystart,cds_queryend);
+ FPRINTF(fp,"Target=%s %d %d +\n",accession,cds_querystart,cds_queryend);
} else {
- fprintf(fp,"Target=%s %d %d -\n",accession,cds_queryend,cds_querystart);
+ FPRINTF(fp,"Target=%s %d %d -\n",accession,cds_queryend,cds_querystart);
}
return;
@@ -2355,36 +2430,36 @@ print_gff3_cds (FILE *fp, int cdsno, int pathnum, char *sourcename, char *access
static void
-print_gff3_cdna_match (FILE *fp, int pathnum, char *sourcename, char *accession, char *chrstring,
+print_gff3_cdna_match (Filestring_T fp, int pathnum, char *sourcename, char *accession, char *chrstring,
int exon_genomestart, int exon_genomeend,
int exon_querystart, int exon_queryend, bool watsonp,
int pctidentity, List_T tokens) {
- fprintf(fp,"%s\t",chrstring); /* 1: seqid */
- fprintf(fp,"%s\t",sourcename); /* 2: source */
- fprintf(fp,"cDNA_match\t"); /* 3: type */
+ FPRINTF(fp,"%s\t",chrstring); /* 1: seqid */
+ FPRINTF(fp,"%s\t",sourcename); /* 2: source */
+ FPRINTF(fp,"cDNA_match\t"); /* 3: type */
if (exon_genomestart < exon_genomeend) {
- fprintf(fp,"%u\t%u\t",exon_genomestart,exon_genomeend); /* 4,5: start, end */
+ FPRINTF(fp,"%u\t%u\t",exon_genomestart,exon_genomeend); /* 4,5: start, end */
} else {
- fprintf(fp,"%u\t%u\t",exon_genomeend,exon_genomestart); /* 4,5: start, end */
+ FPRINTF(fp,"%u\t%u\t",exon_genomeend,exon_genomestart); /* 4,5: start, end */
}
- fprintf(fp,"%d\t",pctidentity); /* 6: score */
+ FPRINTF(fp,"%d\t",pctidentity); /* 6: score */
/* 7: strand */
if (watsonp == true) {
- fprintf(fp,"+\t");
+ FPRINTF(fp,"+\t");
} else {
- fprintf(fp,"-\t");
+ FPRINTF(fp,"-\t");
}
- fprintf(fp,".\t"); /* 8: phase */
+ FPRINTF(fp,".\t"); /* 8: phase */
/* 9: features */
- fprintf(fp,"ID=%s.path%d;",accession,pathnum);
- fprintf(fp,"Name=%s;",accession);
- fprintf(fp,"Target=%s %d %d;Gap=",accession,exon_querystart,exon_queryend);
+ FPRINTF(fp,"ID=%s.path%d;",accession,pathnum);
+ FPRINTF(fp,"Name=%s;",accession);
+ FPRINTF(fp,"Target=%s %d %d;Gap=",accession,exon_querystart,exon_queryend);
print_tokens_gff3(fp,tokens);
- putc('\n',fp);
+ PUTC('\n',fp);
return;
}
@@ -2402,7 +2477,7 @@ strand_char (int strand) {
static void
-print_gff3_est_match (FILE *fp, int pathnum, T start, T end,
+print_gff3_est_match (Filestring_T fp, int pathnum, T start, T end,
char *sourcename, char *accession, char *chrstring,
int exon_genomestart, int exon_genomeend,
int exon_querystart, int exon_queryend,
@@ -2413,27 +2488,27 @@ print_gff3_est_match (FILE *fp, int pathnum, T start, T end,
int den;
int querypos1, querypos2;
- fprintf(fp,"%s\t",chrstring); /* 1: seqid */
- fprintf(fp,"%s\t",sourcename); /* 2: source */
- fprintf(fp,"EST_match\t"); /* 3: type */
+ FPRINTF(fp,"%s\t",chrstring); /* 1: seqid */
+ FPRINTF(fp,"%s\t",sourcename); /* 2: source */
+ FPRINTF(fp,"EST_match\t"); /* 3: type */
if (exon_genomestart < exon_genomeend) {
- fprintf(fp,"%u\t%u\t",exon_genomestart,exon_genomeend); /* 4,5: start, end */
+ FPRINTF(fp,"%u\t%u\t",exon_genomestart,exon_genomeend); /* 4,5: start, end */
} else {
- fprintf(fp,"%u\t%u\t",exon_genomeend,exon_genomestart); /* 4,5: start, end */
+ FPRINTF(fp,"%u\t%u\t",exon_genomeend,exon_genomestart); /* 4,5: start, end */
}
- fprintf(fp,"%d\t",pctidentity); /* 6: score */
+ FPRINTF(fp,"%d\t",pctidentity); /* 6: score */
/* 7: strand */
feature_strand = watsonp ? cdna_direction : -cdna_direction;
- fprintf(fp,"%c\t",strand_char(feature_strand));
+ FPRINTF(fp,"%c\t",strand_char(feature_strand));
- fprintf(fp,".\t"); /* 8: phase */
+ FPRINTF(fp,".\t"); /* 8: phase */
/* 9: features */
- fprintf(fp,"ID=%s.path%d;",accession,pathnum);
- fprintf(fp,"Name=%s;",accession);
+ FPRINTF(fp,"ID=%s.path%d;",accession,pathnum);
+ FPRINTF(fp,"Name=%s;",accession);
target_strand = cdna_direction != 0 ? cdna_direction : (watsonp ? 1 : -1);
- fprintf(fp,"Target=%s %d %d %c;Gap=",accession,exon_querystart,exon_queryend,
+ FPRINTF(fp,"Target=%s %d %d %c;Gap=",accession,exon_querystart,exon_queryend,
strand_char(target_strand));
print_tokens_gff3(fp,tokens);
@@ -2449,23 +2524,23 @@ print_gff3_est_match (FILE *fp, int pathnum, T start, T end,
#else
coverage = (double) (querypos2 - querypos1 + 1)/(double) (querylength_given + skiplength);
#endif
- fprintf(fp,";coverage=%.1f",((double) rint(1000.0*coverage))/10.0);
+ FPRINTF(fp,";coverage=%.1f",((double) rint(1000.0*coverage))/10.0);
if ((den = matches + mismatches + qindels + tindels) == 0) {
fracidentity = 1.0;
} else {
fracidentity = (double) matches/(double) den;
}
- fprintf(fp,";identity=%.1f",((double) rint(1000.0*fracidentity))/10.0);
- fprintf(fp,";matches=%d;mismatches=%d;indels=%d;unknowns=%d",
+ FPRINTF(fp,";identity=%.1f",((double) rint(1000.0*fracidentity))/10.0);
+ FPRINTF(fp,";matches=%d;mismatches=%d;indels=%d;unknowns=%d",
matches,mismatches,qindels+tindels,unknowns);
- putc('\n',fp);
+ PUTC('\n',fp);
}
static void
-print_gff3_exons_forward (FILE *fp, struct T *pairs, int npairs, int pathnum, T start, T end,
+print_gff3_exons_forward (Filestring_T fp, struct T *pairs, int npairs, int pathnum, T start, T end,
char *sourcename, char *accession, char *chrstring,
int querylength_given, int skiplength, int matches, int mismatches,
int qindels, int tindels, int unknowns, bool watsonp, int cdna_direction,
@@ -2555,7 +2630,7 @@ print_gff3_exons_forward (FILE *fp, struct T *pairs, int npairs, int pathnum, T
printf_gff3_intron(++intronno,pathnum,sourcename,accession,chrstring,?,?,intron_start,intron_end,watsonp);
#endif
}
- putc('\n',fp);
+ PUTC('\n',fp);
}
num = den = 0;
@@ -2694,7 +2769,7 @@ print_gff3_exons_forward (FILE *fp, struct T *pairs, int npairs, int pathnum, T
}
static void
-print_gff3_exons_backward (FILE *fp, struct T *pairs, int npairs, int pathnum, char *sourcename, char *accession, char *chrstring,
+print_gff3_exons_backward (Filestring_T fp, struct T *pairs, int npairs, int pathnum, char *sourcename, char *accession, char *chrstring,
bool watsonp, int cdna_direction, bool gff_introns_p) {
bool in_exon = false;
struct T *ptr, *this = NULL;
@@ -2744,7 +2819,7 @@ print_gff3_exons_backward (FILE *fp, struct T *pairs, int npairs, int pathnum, c
printf_gff3_intron(++intronno,pathnum,sourcename,accession,chrstring,?,?,intron_start,intron_end,watsonp);
#endif
}
- putc('\n',fp);
+ PUTC('\n',fp);
}
num = den = 0;
@@ -2797,7 +2872,7 @@ print_gff3_exons_backward (FILE *fp, struct T *pairs, int npairs, int pathnum, c
static void
-print_gff3_cdss_forward (FILE *fp, struct T *pairs, int npairs, int pathnum, char *sourcename, char *accession, char *chrstring,
+print_gff3_cdss_forward (Filestring_T fp, struct T *pairs, int npairs, int pathnum, char *sourcename, char *accession, char *chrstring,
bool watsonp, int cdna_direction) {
bool in_cds = false;
struct T *ptr, *this = NULL;
@@ -2896,7 +2971,7 @@ print_gff3_cdss_forward (FILE *fp, struct T *pairs, int npairs, int pathnum, cha
}
static void
-print_gff3_cdss_backward (FILE *fp, struct T *pairs, int npairs, int pathnum, char *sourcename, char *accession, char *chrstring,
+print_gff3_cdss_backward (Filestring_T fp, struct T *pairs, int npairs, int pathnum, char *sourcename, char *accession, char *chrstring,
bool watsonp, int cdna_direction) {
bool in_cds = false;
struct T *ptr, *this = NULL;
@@ -2998,7 +3073,7 @@ print_gff3_cdss_backward (FILE *fp, struct T *pairs, int npairs, int pathnum, ch
void
-Pair_print_gff3 (FILE *fp, struct T *pairs, int npairs, int pathnum, char *accession,
+Pair_print_gff3 (Filestring_T fp, struct T *pairs, int npairs, int pathnum, char *accession,
T start, T end, Chrnum_T chrnum, Univ_IIT_T chromosome_iit, Sequence_T usersegment,
int translation_end,
int querylength_given, int skiplength, int matches, int mismatches,
@@ -3052,7 +3127,7 @@ Pair_print_gff3 (FILE *fp, struct T *pairs, int npairs, int pathnum, char *acces
}
if (gff3_separators_p == true) {
- fprintf(fp,"###\n"); /* Terminates alignment */
+ FPRINTF(fp,"###\n"); /* Terminates alignment */
}
if (chrnum != 0) {
@@ -3110,7 +3185,7 @@ Pair_circularpos (int *alias, struct T *pairs, int npairs, Chrpos_T chrlength, b
} else {
/* Some of read is in circular proper and some is in circular alias */
*alias = 0;
- return (querylength - ptr->querypos - 1);
+ return (querylength - ptr->querypos/*- 1*/);
}
}
}
@@ -3124,7 +3199,7 @@ Pair_circularpos (int *alias, struct T *pairs, int npairs, Chrpos_T chrlength, b
/* Based on procedure in substring.c */
static void
-print_splicesite_labels (FILE *fp, Chrnum_T chrnum, Chrpos_T splicesitepos,
+print_splicesite_labels (Filestring_T fp, Chrnum_T chrnum, Chrpos_T splicesitepos,
char *tag, IIT_T splicesites_iit, int *splicesites_divint_crosstable,
int typeint) {
int *splicesites, nsplicesites, i;
@@ -3144,14 +3219,14 @@ print_splicesite_labels (FILE *fp, Chrnum_T chrnum, Chrpos_T splicesitepos,
chrnum,splicesitepos,splicesitepos+1U,typeint);
#endif
} else {
- fprintf(fp,",%s:",tag);
+ FPRINTF(fp,",%s:",tag);
label = IIT_label(splicesites_iit,splicesites[0],&allocp);
- fprintf(fp,"%s",label);
+ FPRINTF(fp,"%s",label);
if (allocp) FREE(label);
for (i = 1; i < nsplicesites; i++) {
label = IIT_label(splicesites_iit,splicesites[i],&allocp);
- fprintf(fp,"|%s",label);
+ FPRINTF(fp,"|%s",label);
if (allocp) FREE(label);
}
FREE(splicesites);
@@ -3162,7 +3237,7 @@ print_splicesite_labels (FILE *fp, Chrnum_T chrnum, Chrpos_T splicesitepos,
}
static void
-print_endtypes (FILE *fp,
+print_endtypes (Filestring_T fp,
Endtype_T endtype1, int ntrim1, int nindels1, Chrpos_T prev_splice_dist,
Endtype_T endtype2, int ntrim2, int nindels2, Chrpos_T splice_dist,
int nmatches, int nmismatches_refdiff, int nmismatches_bothdiff,
@@ -3176,11 +3251,11 @@ print_endtypes (FILE *fp,
int typeint1, typeint2;
if (endtype1 == END) {
- fprintf(fp,"start:%d",ntrim1);
+ FPRINTF(fp,"start:%d",ntrim1);
} else if (endtype1 == INS) {
- fprintf(fp,"ins:%d",nindels1);
+ FPRINTF(fp,"ins:%d",nindels1);
} else if (endtype1 == DEL) {
- fprintf(fp,"del:%d",nindels1);
+ FPRINTF(fp,"del:%d",nindels1);
} else if (endtype1 == DON || endtype1 == AMB_DON) {
typeint1 = donor_typeint;
if (watsonp == true) {
@@ -3202,7 +3277,7 @@ print_endtypes (FILE *fp,
abort();
}
}
- fprintf(fp,"donor:%.2f",prob);
+ FPRINTF(fp,"donor:%.2f",prob);
} else if (endtype1 == ACC || endtype1 == AMB_ACC) {
typeint1 = acceptor_typeint;
if (watsonp == true) {
@@ -3224,19 +3299,19 @@ print_endtypes (FILE *fp,
abort();
}
}
- fprintf(fp,"acceptor:%.2f",prob);
+ FPRINTF(fp,"acceptor:%.2f",prob);
} else {
- fprintf(fp,"unknown");
+ FPRINTF(fp,"unknown");
}
- fprintf(fp,"..");
+ FPRINTF(fp,"..");
if (endtype2 == END) {
- fprintf(fp,"end:%d",ntrim2);
+ FPRINTF(fp,"end:%d",ntrim2);
} else if (endtype2 == INS) {
- fprintf(fp,"ins:%d",nindels2);
+ FPRINTF(fp,"ins:%d",nindels2);
} else if (endtype2 == DEL) {
- fprintf(fp,"del:%d",nindels2);
+ FPRINTF(fp,"del:%d",nindels2);
} else if (endtype2 == DON || endtype2 == AMB_DON) {
typeint2 = donor_typeint;
if (watsonp == true) {
@@ -3258,7 +3333,7 @@ print_endtypes (FILE *fp,
abort();
}
}
- fprintf(fp,"donor:%.2f",prob);
+ FPRINTF(fp,"donor:%.2f",prob);
} else if (endtype2 == ACC || endtype2 == AMB_ACC) {
typeint2 = acceptor_typeint;
if (watsonp == true) {
@@ -3280,22 +3355,24 @@ print_endtypes (FILE *fp,
abort();
}
}
- fprintf(fp,"acceptor:%.2f",prob);
+ FPRINTF(fp,"acceptor:%.2f",prob);
} else {
- fprintf(fp,"unknown");
+ FPRINTF(fp,"unknown");
}
- fprintf(fp,",matches:%d,sub:%d",nmatches,nmismatches_bothdiff);
- fprintf(fp,"+%d=%d",nmismatches_refdiff - nmismatches_bothdiff,nmismatches_refdiff);
+ FPRINTF(fp,",matches:%d,sub:%d",nmatches,nmismatches_bothdiff);
+ if (print_nsnpdiffs_p) {
+ FPRINTF(fp,"+%d=%d",nmismatches_refdiff - nmismatches_bothdiff,nmismatches_refdiff);
+ }
if (prev_splice_dist != 0 && splice_dist != 0) {
/* Double introns */
if (cdna_direction > 0) {
- fprintf(fp,",dir:sense,splice_type:consistent");
+ FPRINTF(fp,",dir:sense,splice_type:consistent");
} else {
- fprintf(fp,",dir:antisense,splice_type:consistent");
+ FPRINTF(fp,",dir:antisense,splice_type:consistent");
}
- fprintf(fp,",splice_dist_1:%u,splice_dist_2:%u",prev_splice_dist,splice_dist);
+ FPRINTF(fp,",splice_dist_1:%u,splice_dist_2:%u",prev_splice_dist,splice_dist);
print_splicesite_labels(fp,chrnum,prev_splicesitepos,"label_1",splicesites_iit,
splicesites_divint_crosstable,typeint1);
print_splicesite_labels(fp,chrnum,splicesitepos,"label_2",splicesites_iit,
@@ -3304,11 +3381,11 @@ print_endtypes (FILE *fp,
} else if (prev_splice_dist != 0) {
/* Prev intron */
if (cdna_direction > 0) {
- fprintf(fp,",dir:sense,splice_type:consistent");
+ FPRINTF(fp,",dir:sense,splice_type:consistent");
} else {
- fprintf(fp,",dir:antisense,splice_type:consistent");
+ FPRINTF(fp,",dir:antisense,splice_type:consistent");
}
- fprintf(fp,",splice_dist_1:%u",prev_splice_dist);
+ FPRINTF(fp,",splice_dist_1:%u",prev_splice_dist);
print_splicesite_labels(fp,chrnum,prev_splicesitepos,"label_1",splicesites_iit,
splicesites_divint_crosstable,typeint1);
if (endtype2 == AMB_DON || endtype2 == AMB_ACC) {
@@ -3319,11 +3396,11 @@ print_endtypes (FILE *fp,
} else if (splice_dist != 0) {
/* Next intron */
if (cdna_direction > 0) {
- fprintf(fp,",dir:sense,splice_type:consistent");
+ FPRINTF(fp,",dir:sense,splice_type:consistent");
} else {
- fprintf(fp,",dir:antisense,splice_type:consistent");
+ FPRINTF(fp,",dir:antisense,splice_type:consistent");
}
- fprintf(fp,",splice_dist_2:%u",splice_dist);
+ FPRINTF(fp,",splice_dist_2:%u",splice_dist);
if (endtype1 == AMB_DON || endtype1 == AMB_ACC) {
print_splicesite_labels(fp,chrnum,splicesitepos,"label_1",splicesites_iit,
splicesites_divint_crosstable,typeint1);
@@ -3350,15 +3427,15 @@ print_endtypes (FILE *fp,
/* Based on print_pair_info in stage3hr.c */
static void
-print_pair_info (FILE *fp, int insertlength, int pairscore, Pairtype_T pairtype) {
- fprintf(fp,"pair_score:%d",pairscore);
- fprintf(fp,",insert_length:%d",insertlength);
+print_pair_info (Filestring_T fp, int insertlength, int pairscore, Pairtype_T pairtype) {
+ FPRINTF(fp,"pair_score:%d",pairscore);
+ FPRINTF(fp,",insert_length:%d",insertlength);
switch (pairtype) {
case CONCORDANT: break;
- case PAIRED_SCRAMBLE: fprintf(fp,",pairtype:scramble"); break;
- case PAIRED_INVERSION: fprintf(fp,",pairtype:inversion"); break;
- case PAIRED_TOOLONG: fprintf(fp,",pairtype:toolong"); break;
+ case PAIRED_SCRAMBLE: FPRINTF(fp,",pairtype:scramble"); break;
+ case PAIRED_INVERSION: FPRINTF(fp,",pairtype:inversion"); break;
+ case PAIRED_TOOLONG: FPRINTF(fp,",pairtype:toolong"); break;
case CONCORDANT_TRANSLOCATIONS: break;
case CONCORDANT_TERMINAL: break;
case PAIRED_UNSPECIFIED: abort();
@@ -3372,13 +3449,14 @@ print_pair_info (FILE *fp, int insertlength, int pairscore, Pairtype_T pairtype)
void
-Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments, bool invertedp,
+Pair_print_gsnap (Filestring_T fp, struct T *pairs_querydir, int npairs, int nsegments, bool invertedp,
Endtype_T start_endtype, Endtype_T end_endtype,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
int querylength, bool watsonp, int cdna_direction, int score,
int insertlength, int pairscore, int mapq_score,
Univ_IIT_T chromosome_iit, IIT_T splicesites_iit,
- int *splicesites_divint_crosstable, int donor_typeint, int acceptor_typeint) {
+ int *splicesites_divint_crosstable, int donor_typeint, int acceptor_typeint,
+ bool pairedp, GMAP_source_T gmap_source) {
bool in_exon = true;
struct T *pairs, *ptr, *ptr0, *this = NULL;
int exon_querystart = -1, exon_queryend;
@@ -3410,7 +3488,7 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
strand = '-';
}
- fprintf(fp," "); /* Beginning of GSNAP line */
+ FPRINTF(fp," "); /* Beginning of GSNAP line */
ptr = pairs;
exon_querystart = ptr->querypos + 1;
@@ -3424,19 +3502,19 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
if (watsonp == true) {
if (ntrim_start >= (exon_genomestart - 1)) {
for (querypos = 0; querypos < ntrim_start - exon_genomestart + 1; querypos++) {
- fprintf(fp,"*");
+ FPRINTF(fp,"*");
}
pos = chroffset;
for ( ; querypos < ntrim_start; querypos++) {
c = Genome_get_char_blocks(&c_alt,pos++);
- fprintf(fp,"%c",tolower(c));
+ FPRINTF(fp,"%c",tolower(c));
}
} else {
pos = chroffset + (exon_genomestart - 1) - ntrim_start;
for (querypos = 0; querypos < ntrim_start; querypos++) {
c = Genome_get_char_blocks(&c_alt,pos++);
- fprintf(fp,"%c",tolower(c));
+ FPRINTF(fp,"%c",tolower(c));
}
}
@@ -3444,18 +3522,18 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
if ((pos = chroffset + (exon_genomestart - 1) + ntrim_start) >= chrhigh) {
assert(ntrim_start - (int) (chrhigh - chroffset - exon_genomestart + 1) < querylength);
for (querypos = 0; querypos <= ntrim_start - (int) (chrhigh - chroffset - exon_genomestart + 1); querypos++) {
- fprintf(fp,"*");
+ FPRINTF(fp,"*");
}
pos = chrhigh - 1;
for ( ; querypos < ntrim_start; querypos++) {
c = Genome_get_char_blocks(&c_alt,pos--);
- fprintf(fp,"%c",tolower(complCode[(int) c]));
+ FPRINTF(fp,"%c",tolower(complCode[(int) c]));
}
} else {
for (querypos = 0; querypos < ntrim_start; querypos++) {
c = Genome_get_char_blocks(&c_alt,pos--);
- fprintf(fp,"%c",tolower(complCode[(int) c]));
+ FPRINTF(fp,"%c",tolower(complCode[(int) c]));
}
}
}
@@ -3507,15 +3585,15 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
}
- fprintf(fp,"%c",tolower(ptr[-1].genome)); /* dinucleotide */
- fprintf(fp,"%c",tolower(ptr[0].genome));
+ FPRINTF(fp,"%c",tolower(ptr[-1].genome)); /* dinucleotide */
+ FPRINTF(fp,"%c",tolower(ptr[0].genome));
for (querypos = exon_queryend+2; querypos < querylength; querypos++) {
- fprintf(fp,"-");
+ FPRINTF(fp,"-");
}
- fprintf(fp,"\t%d..%d",exon_querystart,exon_queryend);
- fprintf(fp,"\t%c%s:%u..%u",strand,chr,exon_genomestart,exon_genomeend);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t%d..%d",exon_querystart,exon_queryend);
+ FPRINTF(fp,"\t%c%s:%u..%u",strand,chr,exon_genomestart,exon_genomeend);
+ FPRINTF(fp,"\t");
print_endtypes(fp,prev_endtype,ntrim_start,prev_nindels,prev_splice_dist,
endtype,ntrim_end,/*nindels*/0,splice_dist,
nmatches,nmismatches_refdiff,nmismatches_bothdiff,chrnum,chroffset,
@@ -3524,15 +3602,21 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
donor_typeint,acceptor_typeint);
if (firstp == true) {
- fprintf(fp,"\tsegs:%d,align_score:%d,mapq:%d",nsegments,score,mapq_score);
- fprintf(fp,",method:gmap");
- fprintf(fp,"\t");
- print_pair_info(fp,insertlength,pairscore,/*pairtype*/CONCORDANT);
+ FPRINTF(fp,"\tsegs:%d,align_score:%d,mapq:%d",nsegments,score,mapq_score);
+ switch (gmap_source) {
+ case GMAP_VIA_SUBSTRINGS: FPRINTF(fp,",method:gmap_via_substrings"); break;
+ case GMAP_VIA_SEGMENTS: FPRINTF(fp,",method:gmap_via_segments"); break;
+ case GMAP_VIA_REGION: FPRINTF(fp,",method:gmap_via_region"); break;
+ }
+ if (pairedp == true) {
+ FPRINTF(fp,"\t");
+ print_pair_info(fp,insertlength,pairscore,/*pairtype*/CONCORDANT);
+ }
firstp = false;
}
nmismatches_refdiff = nmismatches_bothdiff = nmatches = 0;
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
in_exon = false;
}
@@ -3547,12 +3631,12 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
exon_querystart = this->querypos + 1;
exon_genomestart = this->genomepos + 1;
- fprintf(fp,",");
+ FPRINTF(fp,",");
for (querypos = 0; querypos < this->querypos - 2; querypos++) {
- fprintf(fp,"-");
+ FPRINTF(fp,"-");
}
- fprintf(fp,"%c",tolower(ptr[-3].genome)); /* dinucleotide */
- fprintf(fp,"%c",tolower(ptr[-2].genome));
+ FPRINTF(fp,"%c",tolower(ptr[-3].genome)); /* dinucleotide */
+ FPRINTF(fp,"%c",tolower(ptr[-2].genome));
in_exon = true;
}
@@ -3589,11 +3673,11 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
/* Finish rest of this line */
for (querypos = exon_queryend; querypos < querylength; querypos++) {
- fprintf(fp,"-");
+ FPRINTF(fp,"-");
}
- fprintf(fp,"\t%d..%d",exon_querystart,exon_queryend);
- fprintf(fp,"\t%c%s:%u..%u",strand,chr,exon_genomestart,exon_genomeend);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t%d..%d",exon_querystart,exon_queryend);
+ FPRINTF(fp,"\t%c%s:%u..%u",strand,chr,exon_genomestart,exon_genomeend);
+ FPRINTF(fp,"\t");
print_endtypes(fp,prev_endtype,ntrim_start,prev_nindels,prev_splice_dist,
endtype,ntrim_end,nindels,/*splice_dist*/0U,
nmatches,nmismatches_refdiff,nmismatches_bothdiff,chrnum,chroffset,
@@ -3602,14 +3686,20 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
donor_typeint,acceptor_typeint);
if (firstp == true) {
- fprintf(fp,"\tsegs:%d,align_score:%d,mapq:%d",nsegments,score,mapq_score);
- fprintf(fp,",method:gmap");
- fprintf(fp,"\t");
- print_pair_info(fp,insertlength,pairscore,/*pairtype*/CONCORDANT);
+ FPRINTF(fp,"\tsegs:%d,align_score:%d,mapq:%d",nsegments,score,mapq_score);
+ switch (gmap_source) {
+ case GMAP_VIA_SUBSTRINGS: FPRINTF(fp,",method:gmap_via_substrings"); break;
+ case GMAP_VIA_SEGMENTS: FPRINTF(fp,",method:gmap_via_segments"); break;
+ case GMAP_VIA_REGION: FPRINTF(fp,",method:gmap_via_region"); break;
+ }
+ if (pairedp == true) {
+ FPRINTF(fp,"\t");
+ print_pair_info(fp,insertlength,pairscore,/*pairtype*/CONCORDANT);
+ }
firstp = false;
}
- fprintf(fp,"\n,");
+ FPRINTF(fp,"\n,");
this = ptr;
exon_querystart = this->querypos + 1;
@@ -3618,7 +3708,7 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
/* Start of next line */
for (querypos = 1; querypos < exon_querystart; querypos++) {
- fprintf(fp,"-");
+ FPRINTF(fp,"-");
}
} else if (this->cdna == ' ') {
@@ -3644,7 +3734,7 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
/* indel_pos = this->querypos; */
nindels = 0;
while (i < npairs && this->gapp == false && this->cdna == ' ') {
- fprintf(fp,"%c",tolower(this->genome));
+ FPRINTF(fp,"%c",tolower(this->genome));
nindels++;
this = ptr++;
i++;
@@ -3654,11 +3744,11 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
/* Finish rest of this line */
for (querypos = exon_queryend + nindels; querypos < querylength; querypos++) {
- fprintf(fp,"-");
+ FPRINTF(fp,"-");
}
- fprintf(fp,"\t%d..%d",exon_querystart,exon_queryend);
- fprintf(fp,"\t%c%s:%u..%u",strand,chr,exon_genomestart,exon_genomeend);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t%d..%d",exon_querystart,exon_queryend);
+ FPRINTF(fp,"\t%c%s:%u..%u",strand,chr,exon_genomestart,exon_genomeend);
+ FPRINTF(fp,"\t");
print_endtypes(fp,prev_endtype,ntrim_start,prev_nindels,prev_splice_dist,
endtype,ntrim_end,nindels,/*splice_dist*/0U,
nmatches,nmismatches_refdiff,nmismatches_bothdiff,chrnum,chroffset,
@@ -3667,14 +3757,20 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
donor_typeint,acceptor_typeint);
if (firstp == true) {
- fprintf(fp,"\tsegs:%d,align_score:%d,mapq:%d",nsegments,score,mapq_score);
- fprintf(fp,",method:gmap");
- fprintf(fp,"\t");
- print_pair_info(fp,insertlength,pairscore,/*pairtype*/CONCORDANT);
+ FPRINTF(fp,"\tsegs:%d,align_score:%d,mapq:%d",nsegments,score,mapq_score);
+ switch (gmap_source) {
+ case GMAP_VIA_SUBSTRINGS: FPRINTF(fp,",method:gmap_via_substrings"); break;
+ case GMAP_VIA_SEGMENTS: FPRINTF(fp,",method:gmap_via_segments"); break;
+ case GMAP_VIA_REGION: FPRINTF(fp,",method:gmap_via_region"); break;
+ }
+ if (pairedp == true) {
+ FPRINTF(fp,"\t");
+ print_pair_info(fp,insertlength,pairscore,/*pairtype*/CONCORDANT);
+ }
firstp = false;
}
- fprintf(fp,"\n,");
+ FPRINTF(fp,"\n,");
this = ptr;
exon_querystart = this->querypos + 1;
@@ -3683,7 +3779,7 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
/* Start of next line */
for (querypos = 1; querypos < exon_querystart; querypos++) {
- fprintf(fp,"-");
+ FPRINTF(fp,"-");
}
} else {
@@ -3694,13 +3790,13 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
} else {
c = this->genome;
if (this->genome == this->cdna) {
- fprintf(fp,"%c",c);
+ FPRINTF(fp,"%c",c);
nmatches++;
} else if (this->genomealt == this->cdna) {
- fprintf(fp,"%c",c);
+ FPRINTF(fp,"%c",c);
nmismatches_refdiff++;
} else {
- fprintf(fp,"%c",tolower(c));
+ FPRINTF(fp,"%c",tolower(c));
nmismatches_bothdiff++;
nmismatches_refdiff++;
}
@@ -3743,16 +3839,16 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
assert((int) (chrhigh - chroffset - exon_genomeend) < querylength);
for (i = 0; i < (int) (chrhigh - chroffset - exon_genomeend); i++) {
c = Genome_get_char_blocks(&c_alt,++pos);
- fprintf(fp,"%c",tolower(c));
+ FPRINTF(fp,"%c",tolower(c));
}
for ( ; i < ntrim_end; i++) {
- fprintf(fp,"*");
+ FPRINTF(fp,"*");
}
} else {
for (i = 0; i < ntrim_end; i++) {
c = Genome_get_char_blocks(&c_alt,++pos);
- fprintf(fp,"%c",tolower(c));
+ FPRINTF(fp,"%c",tolower(c));
}
}
@@ -3761,23 +3857,23 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
if (ntrim_end >= (exon_genomeend - 1)) {
for (i = 0; i < exon_genomeend - 1; i++) {
c = Genome_get_char_blocks(&c_alt,--pos);
- fprintf(fp,"%c",tolower(complCode[(int) c]));
+ FPRINTF(fp,"%c",tolower(complCode[(int) c]));
}
for ( ; i < ntrim_end; i++) {
- fprintf(fp,"*");
+ FPRINTF(fp,"*");
}
} else {
for (i = 0; i < ntrim_end; i++) {
c = Genome_get_char_blocks(&c_alt,--pos);
- fprintf(fp,"%c",tolower(complCode[(int) c]));
+ FPRINTF(fp,"%c",tolower(complCode[(int) c]));
}
}
}
- fprintf(fp,"\t%d..%d",exon_querystart,exon_queryend);
- fprintf(fp,"\t%c%s:%u..%u",strand,chr,exon_genomestart,exon_genomeend);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t%d..%d",exon_querystart,exon_queryend);
+ FPRINTF(fp,"\t%c%s:%u..%u",strand,chr,exon_genomestart,exon_genomeend);
+ FPRINTF(fp,"\t");
print_endtypes(fp,prev_endtype,ntrim_start,prev_nindels,prev_splice_dist,
/*endtype2*/end_endtype,ntrim_end,/*nindels*/0,/*splice_dist*/0U,
nmatches,nmismatches_refdiff,nmismatches_bothdiff,chrnum,chroffset,
@@ -3786,14 +3882,20 @@ Pair_print_gsnap (FILE *fp, struct T *pairs_querydir, int npairs, int nsegments,
donor_typeint,acceptor_typeint);
if (firstp == true) {
- fprintf(fp,"\tsegs:%d,align_score:%d,mapq:%d",nsegments,score,mapq_score);
- fprintf(fp,",method:gmap");
- fprintf(fp,"\t");
- print_pair_info(fp,insertlength,pairscore,/*pairtype*/CONCORDANT);
+ FPRINTF(fp,"\tsegs:%d,align_score:%d,mapq:%d",nsegments,score,mapq_score);
+ switch (gmap_source) {
+ case GMAP_VIA_SUBSTRINGS: FPRINTF(fp,",method:gmap_via_substrings"); break;
+ case GMAP_VIA_SEGMENTS: FPRINTF(fp,",method:gmap_via_segments"); break;
+ case GMAP_VIA_REGION: FPRINTF(fp,",method:gmap_via_region"); break;
+ }
+ if (pairedp == true) {
+ FPRINTF(fp,"\t");
+ print_pair_info(fp,insertlength,pairscore,/*pairtype*/CONCORDANT);
+ }
firstp = false;
}
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
if (allocp) {
FREE(chr);
@@ -3835,48 +3937,48 @@ blast_bitscore (int alignlength, int nmismatches) {
static void
-print_m8_line (FILE *fp, int exon_querystart, int exon_queryend,
+print_m8_line (Filestring_T fp, int exon_querystart, int exon_queryend,
char *chr, Chrpos_T exon_genomestart, Chrpos_T exon_genomeend,
int nmismatches_bothdiff, Shortread_T headerseq, char *acc_suffix) {
double identity;
int alignlength_trim;
- fprintf(fp,"%s%s",Shortread_accession(headerseq),acc_suffix); /* field 0: accession */
+ FPRINTF(fp,"%s%s",Shortread_accession(headerseq),acc_suffix); /* field 0: accession */
- fprintf(fp,"\t%s",chr); /* field 1: chr */
+ FPRINTF(fp,"\t%s",chr); /* field 1: chr */
/* field 2: identity */
alignlength_trim = exon_queryend - exon_querystart;
identity = (double) (alignlength_trim - nmismatches_bothdiff)/(double) alignlength_trim;
- fprintf(fp,"\t%.1f",100.0*identity);
+ FPRINTF(fp,"\t%.1f",100.0*identity);
- fprintf(fp,"\t%d",alignlength_trim); /* field 3: query length */
+ FPRINTF(fp,"\t%d",alignlength_trim); /* field 3: query length */
- fprintf(fp,"\t%d",nmismatches_bothdiff); /* field 4: nmismatches */
+ FPRINTF(fp,"\t%d",nmismatches_bothdiff); /* field 4: nmismatches */
- fprintf(fp,"\t0"); /* field 5: gap openings */
+ FPRINTF(fp,"\t0"); /* field 5: gap openings */
/* fields 6 and 7: query start and end */
- fprintf(fp,"\t%d\t%d",exon_querystart,exon_queryend);
+ FPRINTF(fp,"\t%d\t%d",exon_querystart,exon_queryend);
/* fields 8 and 9: chr start and end */
- fprintf(fp,"\t%u\t%u",exon_genomestart,exon_genomeend);
+ FPRINTF(fp,"\t%u\t%u",exon_genomestart,exon_genomeend);
/* field 10: E value */
- fprintf(fp,"\t%.2g",blast_evalue(alignlength_trim,nmismatches_bothdiff));
+ FPRINTF(fp,"\t%.2g",blast_evalue(alignlength_trim,nmismatches_bothdiff));
/* field 11: bit score */
- fprintf(fp,"\t%.1f",blast_bitscore(alignlength_trim,nmismatches_bothdiff));
+ FPRINTF(fp,"\t%.1f",blast_bitscore(alignlength_trim,nmismatches_bothdiff));
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
return;
}
void
-Pair_print_m8 (FILE *fp, struct T *pairs_querydir, int npairs, bool invertedp,
+Pair_print_m8 (Filestring_T fp, struct T *pairs_querydir, int npairs, bool invertedp,
Chrnum_T chrnum, Shortread_T queryseq, Shortread_T headerseq,
char *acc_suffix, Univ_IIT_T chromosome_iit) {
bool in_exon = true;
@@ -4378,22 +4480,36 @@ Pair_gsnap_nsegments (int *total_nmismatches, int *total_nindels, int *nintrons,
/* Derived from print_tokens_gff3 */
static void
-print_tokens_sam (FILE *fp, List_T tokens) {
+print_tokens_sam (Filestring_T fp, List_T tokens) {
List_T p;
char *token;
for (p = tokens; p != NULL; p = List_next(p)) {
token = (char *) List_head(p);
- fprintf(fp,"%s",token);
- FREE(token);
+ FPRINTF(fp,"%s",token);
+ /* FREE(token); -- Now freed within Stage3end_free or Stage3_free */
+ }
+
+ return;
+}
+
+static void
+print_tokens_stdout (List_T tokens) {
+ List_T p;
+ char *token;
+
+ for (p = tokens; p != NULL; p = List_next(p)) {
+ token = (char *) List_head(p);
+ printf("%s",token);
}
return;
}
+
/* Derived from print_tokens_gff3 */
-static int
-tokens_cigarlength (List_T tokens) {
+int
+Pair_tokens_cigarlength (List_T tokens) {
int length = 0, tokenlength;
List_T p;
char *token;
@@ -4447,43 +4563,43 @@ compute_sam_flag_nomate (int pathnum, int npaths, bool first_read_p, bool watson
/* Modeled after Shortread_print_chopped */
static void
-print_chopped (FILE *fp, char *contents, int querylength,
+print_chopped (Filestring_T fp, char *contents, int querylength,
int hardclip_start, int hardclip_end) {
int i;
for (i = hardclip_start; i < querylength - hardclip_end; i++) {
- putc(contents[i],fp);
+ PUTC(contents[i],fp);
}
return;
}
/* Differs from Shortread version, in that hardclip_high and hardclip_low are not reversed */
static void
-print_chopped_revcomp (FILE *fp, char *contents, int querylength,
+print_chopped_revcomp (Filestring_T fp, char *contents, int querylength,
int hardclip_start, int hardclip_end) {
int i;
for (i = querylength - 1 - hardclip_end; i >= hardclip_start; --i) {
- putc(complCode[(int) contents[i]],fp);
+ PUTC(complCode[(int) contents[i]],fp);
}
return;
}
static void
-print_chopped_end (FILE *fp, char *contents, int querylength,
+print_chopped_end (Filestring_T fp, char *contents, int querylength,
int hardclip_start, int hardclip_end) {
int i;
if (hardclip_start > 0) {
for (i = 0; i < hardclip_start; i++) {
- putc(contents[i],fp);
+ PUTC(contents[i],fp);
}
return;
} else {
for (i = querylength - hardclip_end; i < querylength; i++) {
- putc(contents[i],fp);
+ PUTC(contents[i],fp);
}
return;
}
@@ -4491,19 +4607,59 @@ print_chopped_end (FILE *fp, char *contents, int querylength,
/* Differs from Shortread version, in that hardclip_high and hardclip_low are not reversed */
static void
-print_chopped_end_revcomp (FILE *fp, char *contents, int querylength,
+print_chopped_end_revcomp (Filestring_T fp, char *contents, int querylength,
int hardclip_start, int hardclip_end) {
int i;
if (hardclip_start > 0) {
for (i = hardclip_start - 1; i >= 0; --i) {
- putc(complCode[(int) contents[i]],fp);
+ PUTC(complCode[(int) contents[i]],fp);
+ }
+ return;
+
+ } else {
+ for (i = querylength - 1; i >= querylength - hardclip_end; --i) {
+ PUTC(complCode[(int) contents[i]],fp);
+ }
+ return;
+ }
+}
+
+
+static void
+print_chopped_end_quality (Filestring_T fp, char *quality, int querylength,
+ int hardclip_start, int hardclip_end) {
+ int i;
+
+ if (hardclip_start > 0) {
+ for (i = 0; i < hardclip_start; i++) {
+ PUTC(quality[i],fp);
+ }
+ return;
+
+ } else {
+ for (i = querylength - hardclip_end; i < querylength; i++) {
+ PUTC(quality[i],fp);
+ }
+ return;
+ }
+}
+
+/* Differs from Shortread version, in that hardclip_high and hardclip_low are not reversed */
+static void
+print_chopped_end_quality_reverse (Filestring_T fp, char *quality, int querylength,
+ int hardclip_start, int hardclip_end) {
+ int i;
+
+ if (hardclip_start > 0) {
+ for (i = hardclip_start - 1; i >= 0; --i) {
+ PUTC(quality[i],fp);
}
return;
} else {
for (i = querylength - 1; i >= querylength - hardclip_end; --i) {
- putc(complCode[(int) contents[i]],fp);
+ PUTC(quality[i],fp);
}
return;
}
@@ -4513,13 +4669,13 @@ print_chopped_end_revcomp (FILE *fp, char *contents, int querylength,
/* Modeled after Shortread_print_quality */
static void
-print_quality (FILE *fp, char *quality, int querylength,
+print_quality (Filestring_T fp, char *quality, int querylength,
int hardclip_start, int hardclip_end, int shift) {
int i;
int c;
if (quality == NULL) {
- putc('*',fp);
+ PUTC('*',fp);
} else {
for (i = hardclip_start; i < querylength - hardclip_end; i++) {
if ((c = quality[i] + shift) <= 32) {
@@ -4527,7 +4683,7 @@ print_quality (FILE *fp, char *quality, int querylength,
shift,quality[i]);
abort();
} else {
- putc(c,fp);
+ PUTC(c,fp);
}
}
}
@@ -4536,13 +4692,13 @@ print_quality (FILE *fp, char *quality, int querylength,
static void
-print_quality_revcomp (FILE *fp, char *quality, int querylength,
+print_quality_revcomp (Filestring_T fp, char *quality, int querylength,
int hardclip_start, int hardclip_end, int shift) {
int i;
int c;
if (quality == NULL) {
- putc('*',fp);
+ PUTC('*',fp);
} else {
for (i = querylength - 1 - hardclip_end; i >= hardclip_start; --i) {
if ((c = quality[i] + shift) <= 32) {
@@ -4550,7 +4706,7 @@ print_quality_revcomp (FILE *fp, char *quality, int querylength,
shift,quality[i]);
abort();
} else {
- putc(c,fp);
+ PUTC(c,fp);
}
}
}
@@ -4575,8 +4731,8 @@ sensedir_from_cdna_direction (int cdna_direction) {
/* Derived from print_gff3_cdna_match */
/* Assumes pairarray has been hard clipped already */
static void
-print_sam_line (FILE *fp, char *abbrev, bool first_read_p, char *acc1, char *acc2, char *chrstring,
- bool watsonp, int cdna_direction, List_T cigar_tokens, List_T md_tokens,
+print_sam_line (Filestring_T fp, char *abbrev, bool first_read_p, char *acc1, char *acc2, char *chrstring,
+ bool watsonp, int sensedir, List_T cigar_tokens, List_T md_tokens,
int nmismatches_refdiff, int nmismatches_bothdiff, int nindels,
bool intronp, char *queryseq_ptr, char *quality_string,
int hardclip_start, int hardclip_end, int querylength, Chimera_T chimera, int quality_shift,
@@ -4585,51 +4741,57 @@ print_sam_line (FILE *fp, char *abbrev, bool first_read_p, char *acc1, char *acc
#ifdef GSNAP
Shortread_T queryseq, Resulttype_T resulttype, int pair_mapq_score, int end_mapq_score,
char *mate_chrstring, Chrnum_T mate_chrnum, Chrnum_T mate_effective_chrnum,
- Chrpos_T mate_chrpos, Chrpos_T mate_chrlength, int mate_cdna_direction, int pairedlength,
+ Chrpos_T mate_chrpos, Chrpos_T mate_chrlength, int mate_sensedir, int pairedlength,
#else
int mapq_score, struct T *pairarray, int npairs,
#endif
- char *sam_read_group_id, bool invertp, bool merged_overlap_p) {
- int sensedir;
+ char *sam_read_group_id, bool invertp, bool merged_overlap_p, bool sarrayp) {
+#if 0
+ /* Should already be checked when Stage3_T or Stage3end_T object was created */
if (cigar_action == CIGAR_ACTION_IGNORE) {
/* Don't check */
- } else if (tokens_cigarlength(cigar_tokens) + hardclip_start + hardclip_end == querylength) {
+ } else if (Pair_tokens_cigarlength(cigar_tokens) + hardclip_start + hardclip_end == querylength) {
/* Okay */
} else if (cigar_action == CIGAR_ACTION_WARNING) {
fprintf(stderr,"Warning: for %s, CIGAR length %d plus hardclips %d and %d do not match sequence length %d\n",
- acc1,tokens_cigarlength(cigar_tokens),hardclip_start,hardclip_end,querylength);
+ acc1,Pair_tokens_cigarlength(cigar_tokens),hardclip_start,hardclip_end,querylength);
+ } else if (cigar_action == CIGAR_ACTION_NOPRINT) {
+ fprintf(stderr,"Warning: for %s, CIGAR length %d plus hardclips %d and %d do not match sequence length %d\n",
+ acc1,Pair_tokens_cigarlength(cigar_tokens),hardclip_start,hardclip_end,querylength);
+ return;
} else {
/* CIGAR_ACTION_ABORT */
fprintf(stderr,"Error: for %s, CIGAR length %d plus hardclips %d and %d do not match sequence length %d\n",
- acc1,tokens_cigarlength(cigar_tokens),hardclip_start,hardclip_end,querylength);
+ acc1,Pair_tokens_cigarlength(cigar_tokens),hardclip_start,hardclip_end,querylength);
abort();
}
+#endif
/* 1. QNAME or Accession */
if (acc2 == NULL) {
- fprintf(fp,"%s\t",acc1);
+ FPRINTF(fp,"%s\t",acc1);
} else {
- fprintf(fp,"%s,%s\t",acc1,acc2);
+ FPRINTF(fp,"%s,%s\t",acc1,acc2);
}
/* 2. Flags */
- fprintf(fp,"%u\t",flag);
+ FPRINTF(fp,"%u\t",flag);
/* 3. RNAME or Chrstring */
/* 4. POS or Chrlow */
/* Taken from GMAP part of SAM_chromosomal_pos */
if (chrpos > chrlength) {
- fprintf(fp,"%s\t%u\t",chrstring,chrpos - chrlength /*+ 1U*/);
+ FPRINTF(fp,"%s\t%u\t",chrstring,chrpos - chrlength /*+ 1U*/);
} else {
- fprintf(fp,"%s\t%u\t",chrstring,chrpos /*+ 1U*/);
+ FPRINTF(fp,"%s\t%u\t",chrstring,chrpos /*+ 1U*/);
}
/* 5. MAPQ or Mapping quality */
#ifdef GSNAP
- fprintf(fp,"%d\t",pair_mapq_score);
+ FPRINTF(fp,"%d\t",pair_mapq_score);
#else
- fprintf(fp,"%d\t",mapq_score);
+ FPRINTF(fp,"%d\t",mapq_score);
#endif
/* 6. CIGAR */
@@ -4639,66 +4801,75 @@ print_sam_line (FILE *fp, char *abbrev, bool first_read_p, char *acc1, char *acc
/* 8. MPOS: Mate chrpos */
#ifdef GSNAP
if (mate_chrpos == 0U) {
- fprintf(fp,"\t*\t0");
+ FPRINTF(fp,"\t*\t0");
} else if (mate_chrpos > mate_chrlength) {
- fprintf(fp,"\t%s\t%u",mate_chrstring,mate_chrpos - mate_chrlength /* +1U*/);
+ FPRINTF(fp,"\t%s\t%u",mate_chrstring,mate_chrpos - mate_chrlength /* +1U*/);
} else {
- fprintf(fp,"\t%s\t%u",mate_chrstring,mate_chrpos /* +1U*/);
+ FPRINTF(fp,"\t%s\t%u",mate_chrstring,mate_chrpos /* +1U*/);
}
#else
- fprintf(fp,"\t*\t0");
+ FPRINTF(fp,"\t*\t0");
#endif
/* 9. ISIZE: Insert size */
#ifdef GSNAP
if (resulttype == CONCORDANT_UNIQ || resulttype == CONCORDANT_TRANSLOC || resulttype == CONCORDANT_MULT) {
if (watsonp == invertp) {
- fprintf(fp,"\t%d",-pairedlength);
+ FPRINTF(fp,"\t%d",-pairedlength);
} else {
- fprintf(fp,"\t%d",pairedlength);
+ FPRINTF(fp,"\t%d",pairedlength);
}
} else if (mate_chrpos == 0) {
- fprintf(fp,"\t%d",pairedlength);
+ FPRINTF(fp,"\t%d",pairedlength);
} else if (chrpos < mate_chrpos) {
- fprintf(fp,"\t%d",pairedlength);
+ FPRINTF(fp,"\t%d",pairedlength);
} else if (chrpos > mate_chrpos) {
- fprintf(fp,"\t%d",-pairedlength);
+ FPRINTF(fp,"\t%d",-pairedlength);
} else if (first_read_p == true) {
- fprintf(fp,"\t%d",pairedlength);
+ FPRINTF(fp,"\t%d",pairedlength);
} else {
- fprintf(fp,"\t%d",-pairedlength);
+ FPRINTF(fp,"\t%d",-pairedlength);
}
#else
- fprintf(fp,"\t0");
+ FPRINTF(fp,"\t0");
#endif
/* 10. SEQ: queryseq and 11. QUAL: quality_scores */
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
if (watsonp == true) {
print_chopped(fp,queryseq_ptr,querylength,hardclip_start,hardclip_end);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
print_quality(fp,quality_string,querylength,hardclip_start,hardclip_end,
quality_shift);
} else {
print_chopped_revcomp(fp,queryseq_ptr,querylength,hardclip_start,hardclip_end);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
print_quality_revcomp(fp,quality_string,querylength,hardclip_start,hardclip_end,
quality_shift);
}
/* 12. TAGS: RG */
if (sam_read_group_id != NULL) {
- fprintf(fp,"\tRG:Z:%s",sam_read_group_id);
+ FPRINTF(fp,"\tRG:Z:%s",sam_read_group_id);
}
- /* 12. TAGS: XH */
+ /* 12. TAGS: XH and XI */
if (hardclip_start > 0 || hardclip_end > 0) {
- fprintf(fp,"\tXH:Z:");
+ FPRINTF(fp,"\tXH:Z:");
if (watsonp == true) {
print_chopped_end(fp,queryseq_ptr,querylength,hardclip_start,hardclip_end);
} else {
print_chopped_end_revcomp(fp,queryseq_ptr,querylength,hardclip_start,hardclip_end);
}
+
+ if (quality_string != NULL) {
+ FPRINTF(fp,"\tXI:Z:");
+ if (watsonp == true) {
+ print_chopped_end_quality(fp,quality_string,querylength,hardclip_start,hardclip_end);
+ } else {
+ print_chopped_end_quality_reverse(fp,quality_string,querylength,hardclip_start,hardclip_end);
+ }
+ }
}
#ifdef GSNAP
@@ -4712,90 +4883,93 @@ print_sam_line (FILE *fp, char *abbrev, bool first_read_p, char *acc1, char *acc
#endif
/* 12. TAGS: MD string */
- fprintf(fp,"\tMD:Z:");
+ FPRINTF(fp,"\tMD:Z:");
print_tokens_sam(fp,md_tokens);
/* 12. TAGS: NH */
- fprintf(fp,"\tNH:i:%d",npaths);
+ FPRINTF(fp,"\tNH:i:%d",npaths);
/* 12. TAGS: HI */
- fprintf(fp,"\tHI:i:%d",pathnum);
+ FPRINTF(fp,"\tHI:i:%d",pathnum);
/* 12. TAGS: NM */
- fprintf(fp,"\tNM:i:%d",nmismatches_refdiff + nindels);
+ FPRINTF(fp,"\tNM:i:%d",nmismatches_refdiff + nindels);
if (snps_p) {
/* 12. TAGS: XW and XV */
- fprintf(fp,"\tXW:i:%d",nmismatches_bothdiff);
- fprintf(fp,"\tXV:i:%d",nmismatches_refdiff - nmismatches_bothdiff);
+ FPRINTF(fp,"\tXW:i:%d",nmismatches_bothdiff);
+ FPRINTF(fp,"\tXV:i:%d",nmismatches_refdiff - nmismatches_bothdiff);
}
/* 12. TAGS: SM */
#ifdef GSNAP
- fprintf(fp,"\tSM:i:%d",end_mapq_score);
+ FPRINTF(fp,"\tSM:i:%d",end_mapq_score);
#else
- fprintf(fp,"\tSM:i:%d",40);
+ FPRINTF(fp,"\tSM:i:%d",40);
#endif
/* 12. TAGS: XQ */
- fprintf(fp,"\tXQ:i:%d",absmq_score);
+ FPRINTF(fp,"\tXQ:i:%d",absmq_score);
/* 12. TAGS: X2 */
- fprintf(fp,"\tX2:i:%d",second_absmq);
+ FPRINTF(fp,"\tX2:i:%d",second_absmq);
/* 12. TAGS: XO */
- fprintf(fp,"\tXO:Z:%s",abbrev);
+ FPRINTF(fp,"\tXO:Z:%s",abbrev);
/* 12. TAGS: XS */
- if (intronp == true) {
#ifdef GSNAP
- if ((sensedir = sensedir_from_cdna_direction(cdna_direction)) == SENSE_NULL) {
- sensedir = sensedir_from_cdna_direction(mate_cdna_direction);
- }
-#else
- sensedir = sensedir_from_cdna_direction(cdna_direction);
+ if (sensedir == SENSE_NULL) {
+ sensedir = mate_sensedir;
+ }
#endif
- if (sensedir == SENSE_FORWARD) {
- if (watsonp == true) {
- fprintf(fp,"\tXS:A:+");
- } else {
- fprintf(fp,"\tXS:A:-");
- }
-
- } else if (sensedir == SENSE_ANTI) {
- if (watsonp == true) {
- fprintf(fp,"\tXS:A:-");
- } else {
- fprintf(fp,"\tXS:A:+");
- }
-
- } else if (force_xs_direction_p == true) {
- /* Could not determine sense, so just report arbitrarily as + */
- /* This option provided for users of Cufflinks, which cannot handle XS:A:? */
- fprintf(fp,"\tXS:A:+");
+ if (sensedir == SENSE_FORWARD) {
+ if (watsonp == true) {
+ FPRINTF(fp,"\tXS:A:+");
+ } else {
+ FPRINTF(fp,"\tXS:A:-");
+ }
+ } else if (sensedir == SENSE_ANTI) {
+ if (watsonp == true) {
+ FPRINTF(fp,"\tXS:A:-");
} else {
- /* Non-canonical, so report as such */
- fprintf(fp,"\tXS:A:?");
+ FPRINTF(fp,"\tXS:A:+");
}
+
+ } else if (intronp == false) {
+ /* Skip. No intron in this end and mate is not revealing. */
+
+ } else if (force_xs_direction_p == true) {
+ /* Could not determine sense, so just report arbitrarily as + */
+ /* This option provided for users of Cufflinks, which cannot handle XS:A:? */
+ FPRINTF(fp,"\tXS:A:+");
+
+#if 0
+ } else {
+ /* Non-canonical. Don't report. */
+ FPRINTF(fp,"\tXS:A:?");
+#endif
}
/* 12. TAGS: XT */
if (chimera != NULL) {
- fprintf(fp,"\tXT:Z:");
+ FPRINTF(fp,"\tXT:Z:");
Chimera_print_sam_tag(fp,chimera,chromosome_iit);
}
/* 12. TAGS: XG */
if (merged_overlap_p) {
- fprintf(fp,"\tXG:Z:O");
+ FPRINTF(fp,"\tXG:Z:O");
+ } else if (sarrayp == true) {
+ FPRINTF(fp,"\tXG:Z:B");
} else {
- fprintf(fp,"\tXG:Z:M");
+ FPRINTF(fp,"\tXG:Z:M");
}
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
return;
}
@@ -4860,6 +5034,7 @@ hardclip_pairs (int *clipped_npairs, int hardclip_start, int hardclip_end,
if (i >= npairs) {
/* hardclip_start passes right end of read, so invalid */
+ debug10(printf("i = %d, so passed end of read\n",i));
hardclip_start = 0;
} else if (hardclip_start > 0) {
hardclip_start = ptr->querypos;
@@ -4884,6 +5059,7 @@ hardclip_pairs (int *clipped_npairs, int hardclip_start, int hardclip_end,
if (i < 0) {
/* hardclip_end passes left end of read, so invalid */
+ debug10(printf("i = %d, so passed left end of read\n",i));
hardclip_end = 0;
} else if (hardclip_end > 0) {
hardclip_end = querylength - 1 - ptr->querypos;
@@ -4918,7 +5094,7 @@ Pair_clean_cigar (List_T tokens, bool watsonp) {
type = curr_token[strlen(curr_token)-1];
if (type == last_type) {
length += atoi(last_token);
- FREE(last_token);
+ FREE_OUT(last_token);
duplicatep = true;
} else {
if (last_type == ' ') {
@@ -4943,7 +5119,7 @@ Pair_clean_cigar (List_T tokens, bool watsonp) {
unique = List_push(unique,(void *) last_token);
} else {
length += atoi(last_token);
- FREE(last_token);
+ FREE_OUT(last_token);
sprintf(token,"%d%c",length,last_type);
unique = push_token(unique,token);
}
@@ -4989,9 +5165,9 @@ Pair_clean_cigar (List_T tokens, bool watsonp) {
}
-static List_T
-compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struct T *pairs, int npairs, int querylength_given,
- bool watsonp, int cdna_direction, int chimera_part) {
+List_T
+Pair_compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struct T *pairs, int npairs, int querylength_given,
+ bool watsonp, int sensedir, int chimera_part) {
List_T tokens = NULL;
char token[10];
int Mlength = 0, Ilength = 0, Dlength = 0;
@@ -5097,7 +5273,7 @@ compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struct T *
deletionp = false;
#ifdef CONVERT_INTRONS_TO_DELETIONS
- if (cdna_direction > 0) {
+ if (sensedir == SENSE_FORWARD) {
if (prev->comp == FWD_CANONICAL_INTRON_COMP ||
prev->comp == FWD_GCAG_INTRON_COMP ||
prev->comp == FWD_ATAC_INTRON_COMP) {
@@ -5110,7 +5286,7 @@ compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struct T *
sprintf(token,"%uD",genome_gap);
deletionp = true;
}
- } else if (cdna_direction < 0) {
+ } else if (sensedir == SENSE_ANTI) {
if (prev->comp == REV_CANONICAL_INTRON_COMP ||
prev->comp == REV_GCAG_INTRON_COMP ||
prev->comp == REV_ATAC_INTRON_COMP) {
@@ -5137,7 +5313,7 @@ compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struct T *
tokens = push_token(tokens,token);
/* Check for dual gap. Doesn't work for hard clipping. */
- assert(exon_queryend >= 0);
+ /* assert(exon_queryend >= 0); */
query_gap = this->querypos - exon_queryend;
assert(query_gap >= 0);
@@ -5842,7 +6018,7 @@ compute_md_string (int *nmismatches_refdiff, int *nmismatches_bothdiff, int *nin
} else {
fprintf(stderr,"Unexpected comp '%c'\n",this->comp);
- exit(9);
+ abort();
}
}
@@ -6018,11 +6194,11 @@ compute_md_string (int *nmismatches_refdiff, int *nmismatches_bothdiff, int *nin
void
-Pair_print_sam (FILE *fp, char *abbrev, struct T *pairs, int npairs,
+Pair_print_sam (Filestring_T fp, char *abbrev, struct T *pairarray, int npairs, List_T cigar_tokens, bool intronp,
char *acc1, char *acc2, Chrnum_T chrnum, Univ_IIT_T chromosome_iit, Sequence_T usersegment,
char *queryseq_ptr, char *quality_string,
int clipdir, int hardclip_low, int hardclip_high, int querylength_given,
- bool watsonp, int cdna_direction, int chimera_part, Chimera_T chimera,
+ bool watsonp, int sensedir, int chimera_part, Chimera_T chimera,
int quality_shift, bool first_read_p, int pathnum, int npaths,
int absmq_score, int first_absmq, int second_absmq, Chrpos_T chrpos, Chrpos_T chrlength,
#ifdef GSNAP
@@ -6030,11 +6206,11 @@ Pair_print_sam (FILE *fp, char *abbrev, struct T *pairs, int npairs,
int pair_mapq_score, int end_mapq_score,
Chrnum_T mate_chrnum, Chrnum_T mate_effective_chrnum,
Chrpos_T mate_chrpos, Chrpos_T mate_chrlength,
- int mate_cdna_direction, int pairedlength,
+ int mate_sensedir, int pairedlength,
#else
int mapq_score, bool sam_paired_p,
#endif
- char *sam_read_group_id, bool invertp, bool circularp, bool merged_overlap_p) {
+ char *sam_read_group_id, bool invertp, bool circularp, bool merged_overlap_p, bool sarrayp) {
char *chrstring = NULL;
#ifdef GSNAP
char *mate_chrstring, *mate_chrstring_alloc = NULL;
@@ -6042,13 +6218,14 @@ Pair_print_sam (FILE *fp, char *abbrev, struct T *pairs, int npairs,
unsigned int flag;
#endif
- List_T cigar_tokens = NULL, md_tokens = NULL;
+ List_T md_tokens = NULL;
int nmismatches_refdiff, nmismatches_bothdiff, nindels;
- bool intronp, ignore_intronp;
+ bool ignore_intronp;
int hardclip_start, hardclip_end;
int hardclip_start_zero = 0, hardclip_end_zero = 0;
- struct T *clipped_pairs;
+ struct T *clipped_pairarray;
int clipped_npairs;
+ bool cigar_tokens_alloc;
if (chrnum == 0) {
@@ -6077,8 +6254,8 @@ Pair_print_sam (FILE *fp, char *abbrev, struct T *pairs, int npairs,
flag = compute_sam_flag_nomate(pathnum,npaths,first_read_p,watsonp,sam_paired_p);
#endif
- debug4(printf("Entered Pair_print_sam with clipdir %d, watsonp %d, first_read_p %d, hardclip5 %d, and hardclip3 %d\n",
- clipdir,watsonp,first_read_p,hardclip5,hardclip3));
+ debug4(printf("Entered Pair_print_sam with clipdir %d, watsonp %d, first_read_p %d, hardclip_low %d, and hardclip_high %d\n",
+ clipdir,watsonp,first_read_p,hardclip_low,hardclip_high));
if (watsonp == true) {
hardclip_start = hardclip_low;
@@ -6090,27 +6267,36 @@ Pair_print_sam (FILE *fp, char *abbrev, struct T *pairs, int npairs,
debug4(printf("hardclip_start %d, hardclip_end %d\n",hardclip_start,hardclip_end));
- /* Get CIGAR and intronp for entire read */
- cigar_tokens = compute_cigar(&intronp,&hardclip_start_zero,&hardclip_end_zero,pairs,npairs,querylength_given,
- watsonp,cdna_direction,chimera_part);
- if (hardclip_start == 0 && hardclip_end == 0) {
- clipped_pairs = pairs;
+ if (merged_overlap_p == true) {
+ /* clipped_pairarray = pairarray; */
+ /* clipped_npairs = npairs; */
+ clipped_pairarray = hardclip_pairs(&clipped_npairs,hardclip_start,hardclip_end,
+ pairarray,npairs,querylength_given);
+ cigar_tokens = Pair_compute_cigar(&intronp,&hardclip_start,&hardclip_end,clipped_pairarray,clipped_npairs,querylength_given,
+ watsonp,sensedir,chimera_part);
+ cigar_tokens_alloc = true;
+
+#if 0
+ } else if (hardclip_start == 0 && hardclip_end == 0) {
+ /* Fails for both GSNAP clip-overlap and GMAP chimera */
+ clipped_pairarray = pairarray;
clipped_npairs = npairs;
+#endif
+
} else {
- clipped_pairs = hardclip_pairs(&clipped_npairs,hardclip_start,hardclip_end,
- pairs,npairs,querylength_given);
+ clipped_pairarray = hardclip_pairs(&clipped_npairs,hardclip_start,hardclip_end,
+ pairarray,npairs,querylength_given);
+ cigar_tokens = Pair_compute_cigar(&ignore_intronp,&hardclip_start,&hardclip_end,clipped_pairarray,clipped_npairs,querylength_given,
+ watsonp,sensedir,chimera_part);
+ cigar_tokens_alloc = true;
}
- tokens_free(&cigar_tokens);
/* Cigar updates hardclip5 and hardclip3 for chimeras */
- cigar_tokens = compute_cigar(&ignore_intronp,&hardclip_start,&hardclip_end,clipped_pairs,clipped_npairs,querylength_given,
- watsonp,cdna_direction,chimera_part);
-
md_tokens = compute_md_string(&nmismatches_refdiff,&nmismatches_bothdiff,&nindels,
- clipped_pairs,clipped_npairs,watsonp,cigar_tokens);
+ clipped_pairarray,clipped_npairs,watsonp,cigar_tokens);
print_sam_line(fp,abbrev,first_read_p,acc1,acc2,chrstring,
- watsonp,cdna_direction,cigar_tokens,md_tokens,
+ watsonp,sensedir,cigar_tokens,md_tokens,
nmismatches_refdiff,nmismatches_bothdiff,nindels,
intronp,queryseq_ptr,quality_string,hardclip_start,hardclip_end,
querylength_given,chimera,quality_shift,pathnum,npaths,
@@ -6119,15 +6305,17 @@ Pair_print_sam (FILE *fp, char *abbrev, struct T *pairs, int npairs,
#ifdef GSNAP
queryseq,resulttype,pair_mapq_score,end_mapq_score,mate_chrstring,
mate_chrnum,mate_effective_chrnum,mate_chrpos,mate_chrlength,
- mate_cdna_direction,pairedlength,
+ mate_sensedir,pairedlength,
#else
- mapq_score,clipped_pairs,clipped_npairs,
+ mapq_score,clipped_pairarray,clipped_npairs,
#endif
- sam_read_group_id,invertp,merged_overlap_p);
+ sam_read_group_id,invertp,merged_overlap_p,sarrayp);
/* Print procedures free the character strings */
- List_free(&md_tokens);
- List_free(&cigar_tokens);
+ Pair_tokens_free(&md_tokens);
+ if (cigar_tokens_alloc == true) {
+ Pair_tokens_free(&cigar_tokens);
+ }
#ifdef GSNAP
if (mate_chrstring_alloc != NULL) {
@@ -6143,7 +6331,7 @@ Pair_print_sam (FILE *fp, char *abbrev, struct T *pairs, int npairs,
void
-Pair_print_sam_nomapping (FILE *fp, char *abbrev, char *acc1, char *acc2, char *queryseq_ptr,
+Pair_print_sam_nomapping (Filestring_T fp, char *abbrev, char *acc1, char *acc2, char *queryseq_ptr,
char *quality_string, int querylength, int quality_shift,
bool first_read_p, bool sam_paired_p, char *sam_read_group_id) {
unsigned int flag;
@@ -6155,48 +6343,48 @@ Pair_print_sam_nomapping (FILE *fp, char *abbrev, char *acc1, char *acc2, char *
/* 1. QNAME */
if (acc2 == NULL) {
- fprintf(fp,"%s",acc1);
+ FPRINTF(fp,"%s",acc1);
} else {
- fprintf(fp,"%s,%s",acc1,acc2);
+ FPRINTF(fp,"%s,%s",acc1,acc2);
}
/* 2. FLAG */
flag = compute_sam_flag_nomate(/*pathnum*/0,/*npaths*/0,first_read_p,/*watsonp*/true,sam_paired_p);
- fprintf(fp,"\t%u",flag);
+ FPRINTF(fp,"\t%u",flag);
/* 3. RNAME: chr */
- fprintf(fp,"\t*");
+ FPRINTF(fp,"\t*");
/* 4. POS: chrpos */
- fprintf(fp,"\t0");
+ FPRINTF(fp,"\t0");
/* 5. MAPQ: Mapping quality */
/* Picard says MAPQ should be 0 for an unmapped read */
- fprintf(fp,"\t0");
+ FPRINTF(fp,"\t0");
/* 6. CIGAR */
- fprintf(fp,"\t*");
+ FPRINTF(fp,"\t*");
/* 7. MRNM: Mate chr */
/* 8. MPOS: Mate chrpos */
/* 9. ISIZE: Insert size */
- fprintf(fp,"\t*\t0\t0\t");
+ FPRINTF(fp,"\t*\t0\t0\t");
/* 10. SEQ: queryseq and 11. QUAL: quality scores */
print_chopped(fp,queryseq_ptr,querylength,/*hardclip_start*/0,/*hardclip_end*/0);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
print_quality(fp,quality_string,querylength,/*hardclip_start*/0,/*hardclip_end*/0,
quality_shift);
/* 12. TAGS: RG */
if (sam_read_group_id != NULL) {
- fprintf(fp,"\tRG:Z:%s",sam_read_group_id);
+ FPRINTF(fp,"\tRG:Z:%s",sam_read_group_id);
}
/* 12. TAGS: XO */
- fprintf(fp,"\tXO:Z:%s",abbrev);
+ FPRINTF(fp,"\tXO:Z:%s",abbrev);
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
return;
}
@@ -6266,7 +6454,7 @@ count_psl_blocks_nt (Intlist_T *blockSizes, Intlist_T *qStarts, Uintlist_T *tSta
if (in_block == true) {
nblocks++;
block_queryend = last_querypos;
- debug2(fprintf(fp,"Block size: %d\n",abs(block_queryend-block_querystart)+1));
+ debug2(FPRINTF(fp,"Block size: %d\n",abs(block_queryend-block_querystart)+1));
*blockSizes = Intlist_push(*blockSizes,abs(block_queryend-block_querystart)+1);
in_block = false;
}
@@ -6277,7 +6465,7 @@ count_psl_blocks_nt (Intlist_T *blockSizes, Intlist_T *qStarts, Uintlist_T *tSta
if (in_block == true) {
nblocks++;
block_queryend = last_querypos;
- debug2(fprintf(fp,"Block size: %d\n",abs(block_queryend-block_querystart)+1));
+ debug2(FPRINTF(fp,"Block size: %d\n",abs(block_queryend-block_querystart)+1));
*blockSizes = Intlist_push(*blockSizes,abs(block_queryend-block_querystart)+1);
in_block = false;
}
@@ -6288,10 +6476,10 @@ count_psl_blocks_nt (Intlist_T *blockSizes, Intlist_T *qStarts, Uintlist_T *tSta
if (in_block == false) {
block_querystart = this->querypos;
if (watsonp == true) {
- debug2(fprintf(fp,"Pushing qstart: %d\n",block_querystart));
+ debug2(FPRINTF(fp,"Pushing qstart: %d\n",block_querystart));
*qStarts = Intlist_push(*qStarts,block_querystart);
} else {
- debug2(fprintf(fp,"Pushing qstart: %d\n",querylength-block_querystart-1));
+ debug2(FPRINTF(fp,"Pushing qstart: %d\n",querylength-block_querystart-1));
*qStarts = Intlist_push(*qStarts,querylength-block_querystart-1);
}
*tStarts = Uintlist_push(*tStarts,this->genomepos);
@@ -6313,7 +6501,7 @@ count_psl_blocks_nt (Intlist_T *blockSizes, Intlist_T *qStarts, Uintlist_T *tSta
/* prev = this; */
nblocks++;
block_queryend = last_querypos;
- debug2(fprintf(fp,"Block size: %d\n",abs(block_queryend-block_querystart)+1));
+ debug2(FPRINTF(fp,"Block size: %d\n",abs(block_queryend-block_querystart)+1));
*blockSizes = Intlist_push(*blockSizes,abs(block_queryend-block_querystart)+1);
}
@@ -6410,7 +6598,7 @@ compute_gap_lengths_int (int *nbreaks, int *length, Intlist_T blockSizes, Intlis
int start, end;
/* Intlist_T p = blockSizes, q = Starts; */
- debug2(fprintf(fp,"Entered compute_gap_lengths_int with nblocks = %d, and Starts having length %d\n",
+ debug2(FPRINTF(fp,"Entered compute_gap_lengths_int with nblocks = %d, and Starts having length %d\n",
nblocks,Intlist_length(Starts)));
*nbreaks = *length = 0;
for (i = 0; i < nblocks - 1; i++) {
@@ -6420,7 +6608,7 @@ compute_gap_lengths_int (int *nbreaks, int *length, Intlist_T blockSizes, Intlis
*nbreaks += 1;
*length += (start - end);
}
- debug2(fprintf(fp,"%d - %d = %d, gap = %d\n",start,end,start-end,*length));
+ debug2(FPRINTF(fp,"%d - %d = %d, gap = %d\n",start,end,start-end,*length));
}
end = Intlist_head(Starts) + Intlist_head(blockSizes);
blockSizes = Intlist_next(blockSizes);
@@ -6433,7 +6621,7 @@ compute_gap_lengths_int (int *nbreaks, int *length, Intlist_T blockSizes, Intlis
*nbreaks += 1;
*length += (start - end);
}
- debug2(fprintf(fp,"%d - %d = %d, gap = %d\n",start,end,start-end,*length));
+ debug2(FPRINTF(fp,"%d - %d = %d, gap = %d\n",start,end,start-end,*length));
}
return;
@@ -6456,7 +6644,7 @@ compute_gap_lengths_uint (int *nbreaks, int *length, Intlist_T blockSizes, Uintl
*nbreaks += 1;
*length += (start - end);
}
- debug2(fprintf(fp,"%d - %d = %d, gap = %d\n",start,end,start-end,*length));
+ debug2(FPRINTF(fp,"%d - %d = %d, gap = %d\n",start,end,start-end,*length));
}
end = Uintlist_head(Starts) + Intlist_head(blockSizes);
blockSizes = Intlist_next(blockSizes);
@@ -6469,7 +6657,7 @@ compute_gap_lengths_uint (int *nbreaks, int *length, Intlist_T blockSizes, Uintl
*nbreaks += 1;
*length += (start - end);
}
- debug2(fprintf(fp,"%d - %d = %d, gap = %d\n",start,end,start-end,*length));
+ debug2(FPRINTF(fp,"%d - %d = %d, gap = %d\n",start,end,start-end,*length));
}
return;
@@ -6506,7 +6694,7 @@ count_matches_pro (int *matches, int *mismatches, int *unknowns,
void
-Pair_print_pslformat_nt (FILE *fp, struct T *pairs, int npairs, T start, T end,
+Pair_print_pslformat_nt (Filestring_T fp, struct T *pairs, int npairs, T start, T end,
Sequence_T queryseq, Chrnum_T chrnum,
Univ_IIT_T chromosome_iit, Sequence_T usersegment,
int matches, int unknowns, int mismatches,
@@ -6536,24 +6724,24 @@ Pair_print_pslformat_nt (FILE *fp, struct T *pairs, int npairs, T start, T end,
compute_gap_lengths_int(&qnbreaks,&qlength,blockSizes,qStarts,nblocks);
compute_gap_lengths_uint(&tnbreaks,&tlength,blockSizes,tStarts,nblocks);
- fprintf(fp,"%d\t%d\t%d\t%d\t",matches,mismatches,/*repeatmatches*/0,unknowns);
- fprintf(fp,"%d\t%d\t%d\t%d\t",qnbreaks,qlength,tnbreaks,tlength);
+ FPRINTF(fp,"%d\t%d\t%d\t%d\t",matches,mismatches,/*repeatmatches*/0,unknowns);
+ FPRINTF(fp,"%d\t%d\t%d\t%d\t",qnbreaks,qlength,tnbreaks,tlength);
if (watsonp == true) {
- fprintf(fp,"+");
+ FPRINTF(fp,"+");
} else {
- fprintf(fp,"-");
+ FPRINTF(fp,"-");
}
- fprintf(fp,"\t%s\t%d",Sequence_accession(queryseq),Sequence_fulllength_given(queryseq));
+ FPRINTF(fp,"\t%s\t%d",Sequence_accession(queryseq),Sequence_fulllength_given(queryseq));
- fprintf(fp,"\t%d\t%d",start->querypos,end->querypos+1);
+ FPRINTF(fp,"\t%d\t%d",start->querypos,end->querypos+1);
/* T name and T size */
if (chrnum == 0) {
- fprintf(fp,"\t%s\t%u",Sequence_accession(usersegment),Sequence_fulllength(usersegment));
+ FPRINTF(fp,"\t%s\t%u",Sequence_accession(usersegment),Sequence_fulllength(usersegment));
} else {
chr = Chrnum_to_string(chrnum,chromosome_iit);
- fprintf(fp,"\t%s\t%u",chr,Chrnum_length(chrnum,chromosome_iit));
+ FPRINTF(fp,"\t%s\t%u",chr,Chrnum_length(chrnum,chromosome_iit));
FREE(chr);
}
@@ -6561,26 +6749,26 @@ Pair_print_pslformat_nt (FILE *fp, struct T *pairs, int npairs, T start, T end,
chrpos1 = start->genomepos;
chrpos2 = end->genomepos;
if (watsonp) {
- fprintf(fp,"\t%u\t%u",chrpos1,chrpos2+1U);
+ FPRINTF(fp,"\t%u\t%u",chrpos1,chrpos2+1U);
} else {
- fprintf(fp,"\t%u\t%u",chrpos2,chrpos1+1U);
+ FPRINTF(fp,"\t%u\t%u",chrpos2,chrpos1+1U);
}
- fprintf(fp,"\t%d",nblocks);
+ FPRINTF(fp,"\t%d",nblocks);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
for (p = blockSizes; p != NULL; p = Intlist_next(p)) {
- fprintf(fp,"%d,",Intlist_head(p));
+ FPRINTF(fp,"%d,",Intlist_head(p));
}
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
for (p = qStarts; p != NULL; p = Intlist_next(p)) {
- fprintf(fp,"%d,",Intlist_head(p));
+ FPRINTF(fp,"%d,",Intlist_head(p));
}
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
for (q = tStarts; q != NULL; q = Uintlist_next(q)) {
- fprintf(fp,"%u,",Uintlist_head(q));
+ FPRINTF(fp,"%u,",Uintlist_head(q));
}
Intlist_free(&blockSizes);
@@ -6591,12 +6779,12 @@ Pair_print_pslformat_nt (FILE *fp, struct T *pairs, int npairs, T start, T end,
FREE(pairs_directional);
}
- putc('\n',fp);
+ PUTC('\n',fp);
return;
}
void
-Pair_print_pslformat_pro (FILE *fp, struct T *pairs, int npairs, T start, T end,
+Pair_print_pslformat_pro (Filestring_T fp, struct T *pairs, int npairs, T start, T end,
Sequence_T queryseq, Chrnum_T chrnum,
Univ_IIT_T chromosome_iit, Sequence_T usersegment,
bool watsonp, int cdna_direction) {
@@ -6616,30 +6804,30 @@ Pair_print_pslformat_pro (FILE *fp, struct T *pairs, int npairs, T start, T end,
count_matches_pro(&matches,&mismatches,&unknowns,pairs,npairs);
- fprintf(fp,"%d\t%d\t%d\t%d\t",matches,mismatches,/*repeatmatches*/0,unknowns);
- fprintf(fp,"%d\t%d\t%d\t%d\t",qnbreaks,qlength,tnbreaks,tlength);
+ FPRINTF(fp,"%d\t%d\t%d\t%d\t",matches,mismatches,/*repeatmatches*/0,unknowns);
+ FPRINTF(fp,"%d\t%d\t%d\t%d\t",qnbreaks,qlength,tnbreaks,tlength);
if (cdna_direction >= 0) {
- fprintf(fp,"+");
+ FPRINTF(fp,"+");
} else {
- fprintf(fp,"-");
+ FPRINTF(fp,"-");
}
if (watsonp == true) {
- fprintf(fp,"+");
+ FPRINTF(fp,"+");
} else {
- fprintf(fp,"-");
+ FPRINTF(fp,"-");
}
- fprintf(fp,"\t%s\t%d",Sequence_accession(queryseq),Sequence_fulllength_given(queryseq));
+ FPRINTF(fp,"\t%s\t%d",Sequence_accession(queryseq),Sequence_fulllength_given(queryseq));
- fprintf(fp,"\t%d\t%d",(start->querypos+2)/3,end->querypos/3+1);
+ FPRINTF(fp,"\t%d\t%d",(start->querypos+2)/3,end->querypos/3+1);
/* T name and T size */
if (chrnum == 0) {
- fprintf(fp,"\t%s\t%u",Sequence_accession(usersegment),Sequence_fulllength(usersegment));
+ FPRINTF(fp,"\t%s\t%u",Sequence_accession(usersegment),Sequence_fulllength(usersegment));
} else {
chr = Chrnum_to_string(chrnum,chromosome_iit);
- fprintf(fp,"\tchr%s\t%u",chr,Chrnum_length(chrnum,chromosome_iit));
+ FPRINTF(fp,"\tchr%s\t%u",chr,Chrnum_length(chrnum,chromosome_iit));
FREE(chr);
}
@@ -6647,41 +6835,41 @@ Pair_print_pslformat_pro (FILE *fp, struct T *pairs, int npairs, T start, T end,
chrpos1 = start->genomepos;
chrpos2 = end->genomepos;
if (watsonp) {
- fprintf(fp,"\t%u\t%u",chrpos1,chrpos2+1U);
+ FPRINTF(fp,"\t%u\t%u",chrpos1,chrpos2+1U);
} else {
- fprintf(fp,"\t%u\t%u",chrpos2,chrpos1+1U);
+ FPRINTF(fp,"\t%u\t%u",chrpos2,chrpos1+1U);
}
nblocks = count_psl_blocks_pro(&blockSizes,&qStarts,&tStarts,pairs,npairs,
watsonp,chrlength);
- fprintf(fp,"\t%d",nblocks);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t%d",nblocks);
+ FPRINTF(fp,"\t");
for (p = blockSizes; p != NULL; p = Intlist_next(p)) {
- fprintf(fp,"%d,",Intlist_head(p));
+ FPRINTF(fp,"%d,",Intlist_head(p));
}
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
for (p = qStarts; p != NULL; p = Intlist_next(p)) {
- fprintf(fp,"%d,",Intlist_head(p));
+ FPRINTF(fp,"%d,",Intlist_head(p));
}
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
for (q = tStarts; q != NULL; q = Uintlist_next(q)) {
- fprintf(fp,"%u,",Uintlist_head(q));
+ FPRINTF(fp,"%u,",Uintlist_head(q));
}
Intlist_free(&blockSizes);
Intlist_free(&qStarts);
Uintlist_free(&tStarts);
- putc('\n',fp);
+ PUTC('\n',fp);
return;
}
void
-Pair_print_exons (FILE *fp, struct T *pairs, int npairs, int wraplength, int ngap, bool cdnap) {
+Pair_print_exons (Filestring_T fp, struct T *pairs, int npairs, int wraplength, int ngap, bool cdnap) {
bool in_exon = false;
struct T *ptr, *this = NULL;
int i, exonno = 0, column = 0;
@@ -6693,21 +6881,21 @@ Pair_print_exons (FILE *fp, struct T *pairs, int npairs, int wraplength, int nga
if (this->gapp) {
if (in_exon == true) {
if (column != 0) {
- putc('\n',fp);
+ PUTC('\n',fp);
column = 0;
}
- fprintf(fp,"</exon>\n");
+ FPRINTF(fp,"</exon>\n");
in_exon = false;
if (ngap > 0) {
- fprintf(fp,"<intron %d>\n",exonno);
- putc(this->genome,fp);
+ FPRINTF(fp,"<intron %d>\n",exonno);
+ PUTC(this->genome,fp);
column = 1;
}
} else {
if (ngap > 0) {
- putc(this->genome,fp);
+ PUTC(this->genome,fp);
if (++column % wraplength == 0) {
- putc('\n',fp);
+ PUTC('\n',fp);
column = 0;
}
}
@@ -6721,38 +6909,38 @@ Pair_print_exons (FILE *fp, struct T *pairs, int npairs, int wraplength, int nga
if (ngap > 0) {
if (exonno > 0) {
if (column != 0) {
- putc('\n',fp);
+ PUTC('\n',fp);
column = 0;
}
- fprintf(fp,"</intron>\n");
+ FPRINTF(fp,"</intron>\n");
}
}
- fprintf(fp,"<exon %d",++exonno);
+ FPRINTF(fp,"<exon %d",++exonno);
if (cdnap == true) {
if (this->aaphase_e >= 0) {
- fprintf(fp,", phase %d",this->aaphase_e);
+ FPRINTF(fp,", phase %d",this->aaphase_e);
}
} else {
if (this->aaphase_g >= 0) {
- fprintf(fp,", phase %d",this->aaphase_g);
+ FPRINTF(fp,", phase %d",this->aaphase_g);
}
}
- fprintf(fp,">\n");
+ FPRINTF(fp,">\n");
in_exon = true;
}
if (cdnap == true) {
if (this->cdna != ' ') {
- putc(this->cdna,fp);
+ PUTC(this->cdna,fp);
if (++column % wraplength == 0) {
- putc('\n',fp);
+ PUTC('\n',fp);
column = 0;
}
}
} else {
if (this->genome != ' ') {
- putc(this->genome,fp);
+ PUTC(this->genome,fp);
if (++column % wraplength == 0) {
- putc('\n',fp);
+ PUTC('\n',fp);
column = 0;
}
}
@@ -6760,9 +6948,9 @@ Pair_print_exons (FILE *fp, struct T *pairs, int npairs, int wraplength, int nga
}
}
if (column != 0) {
- putc('\n',fp);
+ PUTC('\n',fp);
}
- fprintf(fp,"</exon>\n");
+ FPRINTF(fp,"</exon>\n");
return;
}
@@ -7171,7 +7359,7 @@ Pair_fracidentity_changepoint (List_T pairs, int cdna_direction) {
for (p = pairs; p != NULL; p = p->rest) {
i++;
this = p->first;
- debug3(fprintf(fp,"%d: ",i));
+ debug3(FPRINTF(fp,"%d: ",i));
debug3(Pair_dump_one(this,/*zerobasedp*/false));
if (this->gapp) {
if (!in_intron) {
@@ -7231,7 +7419,7 @@ Pair_fracidentity_changepoint (List_T pairs, int cdna_direction) {
if (score > maxscore) {
maxscore = score;
changepoint = i;
- debug3(fprintf(fp," => maxscore %d",maxscore));
+ debug3(FPRINTF(fp," => maxscore %d",maxscore));
}
} else if (this->comp == MISMATCH_COMP) {
score += MISMATCH;
@@ -7240,7 +7428,7 @@ Pair_fracidentity_changepoint (List_T pairs, int cdna_direction) {
abort();
}
}
- debug3(fprintf(fp,"\n"));
+ debug3(FPRINTF(fp,"\n"));
prev = this;
}
@@ -7261,7 +7449,7 @@ Pair_fracidentity_score (List_T pairs, int cdna_direction) {
for (p = pairs; p != NULL; p = p->rest) {
i++;
this = p->first;
- debug3(fprintf(fp,"%d: ",i));
+ debug3(FPRINTF(fp,"%d: ",i));
debug3(Pair_dump_one(this,/*zerobasedp*/false));
if (this->gapp) {
if (!in_intron) {
@@ -7300,7 +7488,7 @@ Pair_fracidentity_score (List_T pairs, int cdna_direction) {
abort();
}
}
- debug3(fprintf(fp,"\n"));
+ debug3(FPRINTF(fp,"\n"));
prev = this;
}
@@ -7757,7 +7945,7 @@ invert_intron (char *donor, char *acceptor) {
void
-Pair_print_protein_genomic (FILE *fp, struct T *ptr, int npairs, int wraplength, bool forwardp) {
+Pair_print_protein_genomic (Filestring_T fp, struct T *ptr, int npairs, int wraplength, bool forwardp) {
struct T *this;
int xpos = 0, i;
@@ -7766,43 +7954,43 @@ Pair_print_protein_genomic (FILE *fp, struct T *ptr, int npairs, int wraplength,
this = ptr++;
if (this->aa_g != ' ') {
if (xpos == wraplength) {
- putc('\n',fp);
+ PUTC('\n',fp);
xpos = 0;
}
#ifdef PMAP
- putc(this->aa_g,fp);
+ PUTC(this->aa_g,fp);
xpos++;
#else
if (this->aa_g != '*') {
- putc(this->aa_g,fp);
+ PUTC(this->aa_g,fp);
xpos++;
}
#endif
}
}
- putc('\n',fp);
+ PUTC('\n',fp);
} else {
for (i = npairs-1; i >= 0; i--) {
this = ptr--;
if (this->aa_g != ' ') {
if (xpos == wraplength) {
- putc('\n',fp);
+ PUTC('\n',fp);
xpos = 0;
}
#ifdef PMAP
abort();
- putc(this->aa_g,fp);
+ PUTC(this->aa_g,fp);
xpos++;
#else
if (this->aa_g != '*') {
- putc(this->aa_g,fp);
+ PUTC(this->aa_g,fp);
xpos++;
}
#endif
}
}
- putc('\n',fp);
+ PUTC('\n',fp);
}
@@ -7811,7 +7999,7 @@ Pair_print_protein_genomic (FILE *fp, struct T *ptr, int npairs, int wraplength,
#ifdef PMAP
void
-Pair_print_nucleotide_cdna (FILE *fp, struct T *ptr, int npairs, int wraplength) {
+Pair_print_nucleotide_cdna (Filestring_T fp, struct T *ptr, int npairs, int wraplength) {
struct T *this;
int xpos = 0, i;
@@ -7819,19 +8007,19 @@ Pair_print_nucleotide_cdna (FILE *fp, struct T *ptr, int npairs, int wraplength)
this = ptr++;
if (this->cdna != ' ') {
if (xpos == wraplength) {
- putc('\n',fp);
+ PUTC('\n',fp);
xpos = 0;
}
- putc(this->cdna,fp);
+ PUTC(this->cdna,fp);
xpos++;
}
}
- putc('\n',fp);
+ PUTC('\n',fp);
return;
}
#else
void
-Pair_print_protein_cdna (FILE *fp, struct T *ptr, int npairs, int wraplength, bool forwardp) {
+Pair_print_protein_cdna (Filestring_T fp, struct T *ptr, int npairs, int wraplength, bool forwardp) {
struct T *this;
int xpos = 0, i;
@@ -7840,32 +8028,32 @@ Pair_print_protein_cdna (FILE *fp, struct T *ptr, int npairs, int wraplength, bo
this = ptr++;
if (this->aa_e != ' ') {
if (xpos == wraplength) {
- putc('\n',fp);
+ PUTC('\n',fp);
xpos = 0;
}
if (this->aa_e != '*') {
- putc(this->aa_e,fp);
+ PUTC(this->aa_e,fp);
xpos++;
}
}
}
- putc('\n',fp);
+ PUTC('\n',fp);
} else {
for (i = npairs-1; i >= 0; i--) {
this = ptr--;
if (this->aa_e != ' ') {
if (xpos == wraplength) {
- putc('\n',fp);
+ PUTC('\n',fp);
xpos = 0;
}
if (this->aa_e != '*') {
- putc(this->aa_e,fp);
+ PUTC(this->aa_e,fp);
xpos++;
}
}
}
- putc('\n',fp);
+ PUTC('\n',fp);
}
return;
@@ -7874,7 +8062,7 @@ Pair_print_protein_cdna (FILE *fp, struct T *ptr, int npairs, int wraplength, bo
void
-Pair_print_compressed (FILE *fp, int pathnum, int npaths, T start, T end, Sequence_T queryseq, char *dbversion,
+Pair_print_compressed (Filestring_T fp, int pathnum, int npaths, T start, T end, Sequence_T queryseq, char *dbversion,
Sequence_T usersegment, int nexons, double fracidentity,
struct T *pairs, int npairs, Chrnum_T chrnum,
Univcoord_T chroffset, Univ_IIT_T chromosome_iit, int querylength_given,
@@ -7904,18 +8092,18 @@ Pair_print_compressed (FILE *fp, int pathnum, int npaths, T start, T end, Sequen
querypos1 = start->querypos;
querypos2 = end->querypos;
- fprintf(fp,">%s ",Sequence_accession(queryseq));
+ FPRINTF(fp,">%s ",Sequence_accession(queryseq));
if (dbversion != NULL) {
- fprintf(fp,"%s ",dbversion);
+ FPRINTF(fp,"%s ",dbversion);
} else if (usersegment != NULL && Sequence_accession(usersegment) != NULL) {
- fprintf(fp,"%s ",Sequence_accession(usersegment));
+ FPRINTF(fp,"%s ",Sequence_accession(usersegment));
} else {
- fprintf(fp,"user-provided ");
+ FPRINTF(fp,"user-provided ");
}
#ifdef PMAP
- fprintf(fp,"%d/%d %d %d",pathnum,npaths,(querylength_given+skiplength)*3,nexons);
+ FPRINTF(fp,"%d/%d %d %d",pathnum,npaths,(querylength_given+skiplength)*3,nexons);
coverage = (double) (querypos2 - querypos1 + 1)/(double) ((querylength_given+skiplength)*3);
- fprintf(fp," %.1f",((double) rint(1000.0*coverage)));
+ FPRINTF(fp," %.1f",((double) rint(1000.0*coverage)));
#else
coverage = (double) (querypos2 - querypos1 + 1)/(double) (querylength_given+skiplength);
if (end->querypos + 1 > trim_end) {
@@ -7926,50 +8114,50 @@ Pair_print_compressed (FILE *fp, int pathnum, int npaths, T start, T end, Sequen
}
/*
trimmed_coverage = (double) (end->querypos - start->querypos + 1)/(double) (trim_end - trim_start + skiplength);
- fprintf(fp,">%s %s %d/%d %d(%d) %d",
+ FPRINTF(fp,">%s %s %d/%d %d(%d) %d",
Sequence_accession(queryseq),dbversion,pathnum,npaths,
querylength_given+skiplength,trim_end-trim_start,nexons);
- fprintf(fp," %.1f(%.1f)",((double) rint(1000.0*coverage))/10.0,((double) rint(1000.0*trimmed_coverage))/10.0);
+ FPRINTF(fp," %.1f(%.1f)",((double) rint(1000.0*coverage))/10.0,((double) rint(1000.0*trimmed_coverage))/10.0);
*/
- fprintf(fp,"%d/%d %d %d",pathnum,npaths,querylength_given+skiplength,nexons);
- fprintf(fp," %.1f",((double) rint(1000.0*coverage))/10.0);
+ FPRINTF(fp,"%d/%d %d %d",pathnum,npaths,querylength_given+skiplength,nexons);
+ FPRINTF(fp," %.1f",((double) rint(1000.0*coverage))/10.0);
#endif
- fprintf(fp," %.1f",((double) rint(1000.0*fracidentity))/10.0);
+ FPRINTF(fp," %.1f",((double) rint(1000.0*fracidentity))/10.0);
start = &(pairs[0]);
end = &(pairs[npairs-1]);
- fprintf(fp," %d%s%d",start->querypos + ONEBASEDP,"..",end->querypos + ONEBASEDP);
+ FPRINTF(fp," %d%s%d",start->querypos + ONEBASEDP,"..",end->querypos + ONEBASEDP);
chrpos1 = start->genomepos;
chrpos2 = end->genomepos;
position1 = chroffset + chrpos1;
position2 = chroffset + chrpos2;
- fprintf(fp," %u%s%u",position1 + ONEBASEDP,"..",position2 + ONEBASEDP);
+ FPRINTF(fp," %u%s%u",position1 + ONEBASEDP,"..",position2 + ONEBASEDP);
if (chrnum == 0) {
- fprintf(fp," %u%s%u",chrpos1 + ONEBASEDP,"..",chrpos2 + ONEBASEDP);
+ FPRINTF(fp," %u%s%u",chrpos1 + ONEBASEDP,"..",chrpos2 + ONEBASEDP);
} else {
chr = Chrnum_to_string(chrnum,chromosome_iit);
- fprintf(fp," %s:%u%s%u",chr,chrpos1 + ONEBASEDP,"..",chrpos2 + ONEBASEDP);
+ FPRINTF(fp," %s:%u%s%u",chr,chrpos1 + ONEBASEDP,"..",chrpos2 + ONEBASEDP);
FREE(chr);
}
if (chrpos1 <= chrpos2) {
- fprintf(fp," +");
+ FPRINTF(fp," +");
} else {
- fprintf(fp," -");
+ FPRINTF(fp," -");
}
if (cdna_direction > 0) {
- fprintf(fp," dir:sense");
+ FPRINTF(fp," dir:sense");
} else if (cdna_direction < 0) {
- fprintf(fp," dir:antisense");
+ FPRINTF(fp," dir:antisense");
} else {
- fprintf(fp," dir:indet");
+ FPRINTF(fp," dir:indet");
}
if (checksump == true) {
- fprintf(fp," md5:");
+ FPRINTF(fp," md5:");
Sequence_print_digest(fp,queryseq);
}
@@ -7977,23 +8165,23 @@ Pair_print_compressed (FILE *fp, int pathnum, int npaths, T start, T end, Sequen
if (chimeraequivpos == chimerapos) {
if (donor_prob > 0.0 && acceptor_prob > 0.0) {
if (chimera_cdna_direction >= 0) {
- fprintf(fp," chimera:%d(>)/%.3f/%.3f",chimerapos + ONEBASEDP,donor_prob,acceptor_prob);
+ FPRINTF(fp," chimera:%d(>)/%.3f/%.3f",chimerapos + ONEBASEDP,donor_prob,acceptor_prob);
} else {
- fprintf(fp," chimera:%d(<)/%.3f/%.3f",chimerapos + ONEBASEDP,donor_prob,acceptor_prob);
+ FPRINTF(fp," chimera:%d(<)/%.3f/%.3f",chimerapos + ONEBASEDP,donor_prob,acceptor_prob);
}
} else {
- fprintf(fp," chimera:%d",chimerapos + ONEBASEDP);
+ FPRINTF(fp," chimera:%d",chimerapos + ONEBASEDP);
}
} else {
- fprintf(fp," chimera:%d..%d",chimerapos + ONEBASEDP,chimeraequivpos + ONEBASEDP);
+ FPRINTF(fp," chimera:%d..%d",chimerapos + ONEBASEDP,chimeraequivpos + ONEBASEDP);
}
}
if (strain != NULL) {
- fprintf(fp," strain:%s",strain);
+ FPRINTF(fp," strain:%s",strain);
}
- putc('\n',fp);
+ PUTC('\n',fp);
for (i = 0; i < npairs; i++) {
/* prev = this; */
@@ -8010,12 +8198,12 @@ Pair_print_compressed (FILE *fp, int pathnum, int npaths, T start, T end, Sequen
intron_start = exon_genomeend - 1;
}
- fprintf(fp,"\t%u %u",exon_genomestart,exon_genomeend);
- fprintf(fp," %d %d",exon_querystart,exon_queryend);
+ FPRINTF(fp,"\t%u %u",exon_genomestart,exon_genomeend);
+ FPRINTF(fp," %d %d",exon_querystart,exon_queryend);
if (den == 0) {
- fprintf(fp," 100");
+ FPRINTF(fp," 100");
} else {
- fprintf(fp," %d",(int) floor(100.0*(double) num/(double) den));
+ FPRINTF(fp," %d",(int) floor(100.0*(double) num/(double) den));
}
print_dinucleotide_p = 1;
if (this->comp == FWD_CANONICAL_INTRON_COMP) {
@@ -8050,7 +8238,7 @@ Pair_print_compressed (FILE *fp, int pathnum, int npaths, T start, T end, Sequen
tokens = List_reverse(tokens);
print_tokens_compressed(fp,tokens);
List_free(&tokens);
- fprintf(fp,"\t%d",exon_queryend - exon_querystart + 1);
+ FPRINTF(fp,"\t%d",exon_queryend - exon_querystart + 1);
runlength = 0;
donor[0] = this->genome;
@@ -8077,9 +8265,9 @@ Pair_print_compressed (FILE *fp, int pathnum, int npaths, T start, T end, Sequen
}
if (i > 0) {
if (intron_end > intron_start) {
- fprintf(fp,"\t%d",intron_end - intron_start + 1);
+ FPRINTF(fp,"\t%d",intron_end - intron_start + 1);
} else {
- fprintf(fp,"\t%d",intron_start - intron_end + 1);
+ FPRINTF(fp,"\t%d",intron_start - intron_end + 1);
}
if (print_dinucleotide_p == -1) {
invert_intron(donor,acceptor);
@@ -8091,15 +8279,15 @@ Pair_print_compressed (FILE *fp, int pathnum, int npaths, T start, T end, Sequen
(acceptor[1] == 'G' || acceptor[1] == 'g')) {
/* Do nothing */
} else {
- fprintf(fp,"\t%c%c-%c%c",toupper(donor[0]),toupper(donor[1]),toupper(acceptor[0]),toupper(acceptor[1]));
+ FPRINTF(fp,"\t%c%c-%c%c",toupper(donor[0]),toupper(donor[1]),toupper(acceptor[0]),toupper(acceptor[1]));
}
}
#if 0
if (exon_querystart > exon_queryend + 1) {
- fprintf(fp,"***");
+ FPRINTF(fp,"***");
}
#endif
- putc('\n',fp);
+ PUTC('\n',fp);
}
num = den = 0;
@@ -8156,12 +8344,12 @@ Pair_print_compressed (FILE *fp, int pathnum, int npaths, T start, T end, Sequen
exon_queryend = last_querypos + ONEBASEDP;
exon_genomeend = last_genomepos + ONEBASEDP;
- fprintf(fp,"\t%d %d",exon_genomestart,exon_genomeend);
- fprintf(fp," %d %d",exon_querystart,exon_queryend);
+ FPRINTF(fp,"\t%d %d",exon_genomestart,exon_genomeend);
+ FPRINTF(fp," %d %d",exon_querystart,exon_queryend);
if (den == 0) {
- fprintf(fp," 100");
+ FPRINTF(fp," 100");
} else {
- fprintf(fp," %d",(int) floor(100.0*(double) num/(double) den));
+ FPRINTF(fp," %d",(int) floor(100.0*(double) num/(double) den));
}
sprintf(token,"%d*",runlength);
@@ -8170,15 +8358,15 @@ Pair_print_compressed (FILE *fp, int pathnum, int npaths, T start, T end, Sequen
print_tokens_compressed(fp,tokens);
List_free(&tokens);
- fprintf(fp,"\t%d",exon_queryend - exon_querystart + 1);
- putc('\n',fp);
+ FPRINTF(fp,"\t%d",exon_queryend - exon_querystart + 1);
+ PUTC('\n',fp);
return;
}
void
-Pair_print_iit_map (FILE *fp, Sequence_T queryseq, char *accession,
+Pair_print_iit_map (Filestring_T fp, Sequence_T queryseq, char *accession,
T start, T end, Chrnum_T chrnum, Univ_IIT_T chromosome_iit) {
char *chrstring = NULL;
Chrpos_T chrpos1, chrpos2;
@@ -8192,7 +8380,7 @@ Pair_print_iit_map (FILE *fp, Sequence_T queryseq, char *accession,
/* Made identical to code for Pair_print_iit_exon_map */
chrpos1 = start->genomepos + ONEBASEDP;
chrpos2 = end->genomepos + ONEBASEDP;
- fprintf(fp,">%s %s:%u..%u\n",accession,chrstring,chrpos1,chrpos2);
+ FPRINTF(fp,">%s %s:%u..%u\n",accession,chrstring,chrpos1,chrpos2);
Sequence_print_header(fp,queryseq,/*checksump*/false);
if (chrnum != 0) {
@@ -8204,7 +8392,7 @@ Pair_print_iit_map (FILE *fp, Sequence_T queryseq, char *accession,
void
-Pair_print_iit_exon_map (FILE *fp, struct T *pairs, int npairs, Sequence_T queryseq, char *accession,
+Pair_print_iit_exon_map (Filestring_T fp, struct T *pairs, int npairs, Sequence_T queryseq, char *accession,
T start, T end, Chrnum_T chrnum, Univ_IIT_T chromosome_iit) {
int i;
bool in_exon = false;
@@ -8222,7 +8410,7 @@ Pair_print_iit_exon_map (FILE *fp, struct T *pairs, int npairs, Sequence_T query
chrpos1 = start->genomepos + ONEBASEDP;
chrpos2 = end->genomepos + ONEBASEDP;
- fprintf(fp,">%s %s:%u..%u\n",accession,chrstring,chrpos1,chrpos2);
+ FPRINTF(fp,">%s %s:%u..%u\n",accession,chrstring,chrpos1,chrpos2);
Sequence_print_header(fp,queryseq,/*checksump*/false);
for (i = 0; i < npairs; i++) {
@@ -8233,7 +8421,7 @@ Pair_print_iit_exon_map (FILE *fp, struct T *pairs, int npairs, Sequence_T query
if (in_exon == true) {
/* Beginning of gap */
exon_genomeend = last_genomepos + ONEBASEDP;
- fprintf(fp,"%u %u\n",exon_genomestart,exon_genomeend);
+ FPRINTF(fp,"%u %u\n",exon_genomestart,exon_genomeend);
in_exon = false;
}
} else if (this->comp == INTRONGAP_COMP) {
@@ -8254,7 +8442,7 @@ Pair_print_iit_exon_map (FILE *fp, struct T *pairs, int npairs, Sequence_T query
/* prev = this; */
exon_genomeend = last_genomepos + ONEBASEDP;
- fprintf(fp,"%u %u\n",exon_genomestart,exon_genomeend);
+ FPRINTF(fp,"%u %u\n",exon_genomestart,exon_genomeend);
if (chrnum != 0) {
FREE(chrstring);
@@ -8265,7 +8453,7 @@ Pair_print_iit_exon_map (FILE *fp, struct T *pairs, int npairs, Sequence_T query
void
-Pair_print_splicesites (FILE *fp, struct T *pairs, int npairs, char *accession,
+Pair_print_splicesites (Filestring_T fp, struct T *pairs, int npairs, char *accession,
int nexons, Chrnum_T chrnum, Univ_IIT_T chromosome_iit, bool watsonp) {
int exoni = 0, i;
bool in_exon = false;
@@ -8289,9 +8477,9 @@ Pair_print_splicesites (FILE *fp, struct T *pairs, int npairs, char *accession,
/* Beginning of gap */
exon_genomeend = last_genomepos + ONEBASEDP;
if (watsonp) {
- fprintf(fp,">%s.exon%d/%d %s:%u..%u donor",accession,exoni,nexons,chrstring,exon_genomeend,exon_genomeend+1U);
+ FPRINTF(fp,">%s.exon%d/%d %s:%u..%u donor",accession,exoni,nexons,chrstring,exon_genomeend,exon_genomeend+1U);
} else {
- fprintf(fp,">%s.exon%d/%d %s:%u..%u donor",accession,exoni,nexons,chrstring,exon_genomeend,exon_genomeend-1U);
+ FPRINTF(fp,">%s.exon%d/%d %s:%u..%u donor",accession,exoni,nexons,chrstring,exon_genomeend,exon_genomeend-1U);
}
in_exon = false;
}
@@ -8306,14 +8494,14 @@ Pair_print_splicesites (FILE *fp, struct T *pairs, int npairs, char *accession,
exon_genomestart = this->genomepos + ONEBASEDP;
if (watsonp) {
intron_length = exon_genomestart - exon_genomeend - 1U;
- fprintf(fp," %u\n",intron_length); /* For previous donor */
- fprintf(fp,">%s.exon%d/%d %s:%u..%u acceptor",accession,exoni,nexons,chrstring,exon_genomestart-1U,exon_genomestart);
- fprintf(fp," %u\n",intron_length);
+ FPRINTF(fp," %u\n",intron_length); /* For previous donor */
+ FPRINTF(fp,">%s.exon%d/%d %s:%u..%u acceptor",accession,exoni,nexons,chrstring,exon_genomestart-1U,exon_genomestart);
+ FPRINTF(fp," %u\n",intron_length);
} else {
intron_length = exon_genomeend - exon_genomestart - 1U;
- fprintf(fp," %u\n",intron_length); /* For previous donor */
- fprintf(fp,">%s.exon%d/%d %s:%u..%u acceptor",accession,exoni,nexons,chrstring,exon_genomestart+1U,exon_genomestart);
- fprintf(fp," %u\n",intron_length);
+ FPRINTF(fp," %u\n",intron_length); /* For previous donor */
+ FPRINTF(fp,">%s.exon%d/%d %s:%u..%u acceptor",accession,exoni,nexons,chrstring,exon_genomestart+1U,exon_genomestart);
+ FPRINTF(fp," %u\n",intron_length);
}
}
@@ -8334,7 +8522,7 @@ Pair_print_splicesites (FILE *fp, struct T *pairs, int npairs, char *accession,
void
-Pair_print_introns (FILE *fp, struct T *pairs, int npairs, char *accession,
+Pair_print_introns (Filestring_T fp, struct T *pairs, int npairs, char *accession,
int nexons, Chrnum_T chrnum, Univ_IIT_T chromosome_iit) {
int exoni = 0, i;
bool in_exon = false;
@@ -8368,7 +8556,7 @@ Pair_print_introns (FILE *fp, struct T *pairs, int npairs, char *accession,
exoni++;
if (exoni > 1) {
exon_genomestart = this->genomepos + ONEBASEDP;
- fprintf(fp,">%s.intron%d/%d %s:%u..%u\n",accession,exoni-1,nexons-1,chrstring,exon_genomeend,exon_genomestart);
+ FPRINTF(fp,">%s.intron%d/%d %s:%u..%u\n",accession,exoni-1,nexons-1,chrstring,exon_genomeend,exon_genomestart);
}
in_exon = true;
@@ -8839,7 +9027,7 @@ Pair_trim_ends (bool *trim5p, bool *trim3p, List_T pairs, int ambig_end_length_5
debug8(printf("Entered trim_ends\n"));
if (pairs == NULL) {
- *trim5p = *trim3p = 0;
+ *trim5p = *trim3p = false;
return (List_T) NULL;
}
diff --git a/src/pair.h b/src/pair.h
index 5ed76d9..5513bd7 100644
--- a/src/pair.h
+++ b/src/pair.h
@@ -1,4 +1,4 @@
-/* $Id: pair.h 161598 2015-03-21 02:37:54Z twu $ */
+/* $Id: pair.h 166783 2015-06-02 17:58:02Z twu $ */
#ifndef PAIR_INCLUDED
#define PAIR_INCLUDED
@@ -17,6 +17,8 @@ typedef struct Pair_T *Pair_T;
#include "chimera.h"
#include "substring.h" /* For Endtype_T */
#include "sense.h"
+#include "filestring.h"
+
#ifdef GSNAP
#include "resulthr.h" /* For Resulttype_T. Don't call for GMAP, because result.h conflicts */
@@ -24,16 +26,13 @@ typedef struct Pair_T *Pair_T;
#define MATCHESPERGAP 3
-typedef enum {CIGAR_ACTION_IGNORE, CIGAR_ACTION_WARNING, CIGAR_ACTION_ABORT} Cigar_action_T;
-
-
#define T Pair_T
extern void
Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
bool gff3_separators_p_in, bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
- bool md_lowercase_variant_p_in, bool snps_p_in, Univcoord_T genomelength_in,
- Cigar_action_T cigar_action_in);
+ bool md_lowercase_variant_p_in, bool snps_p_in, bool print_nsnpdiffs_p_in,
+ Univcoord_T genomelength_in);
extern int
Pair_querypos (T this);
extern Chrpos_T
@@ -81,20 +80,18 @@ Pair_free_out (T *old);
extern int
Pair_translation_length (struct T *pairs, int npairs);
extern void
-Pair_print_continuous (FILE *fp, struct T *pairs, int npairs, bool watsonp,
- bool diagnosticp, bool genomefirstp, int invertmode,
- bool nointronlenp);
+Pair_print_continuous (Filestring_T fp, struct T *pairs, int npairs, bool watsonp,
+ bool genomefirstp, int invertmode, bool nointronlenp);
extern void
-Pair_print_continuous_byexon (FILE *fp, struct T *pairs, int npairs, bool watsonp, bool diagnosticp, int invertmode);
+Pair_print_continuous_byexon (Filestring_T fp, struct T *pairs, int npairs, bool watsonp, int invertmode);
extern void
-Pair_print_alignment (FILE *fp, struct T *pairs, int npairs, Chrnum_T chrnum,
+Pair_print_alignment (Filestring_T fp, struct T *pairs, int npairs, Chrnum_T chrnum,
Univcoord_T chroffset, Univ_IIT_T chromosome_iit, bool watsonp,
- bool diagnosticp, int invertmode, bool nointronlenp,
- int wraplength);
+ int invertmode, bool nointronlenp, int wraplength);
extern void
-Pair_print_pathsummary (FILE *fp, int pathnum, T start, T end, Chrnum_T chrnum,
+Pair_print_pathsummary (Filestring_T fp, int pathnum, T start, T end, Chrnum_T chrnum,
Univcoord_T chroffset, Univ_IIT_T chromosome_iit, bool referencealignp,
IIT_T altstrain_iit, char *strain, Univ_IIT_T contig_iit, char *dbversion,
int querylength_given, int skiplength, int trim_start, int trim_end,
@@ -102,20 +99,24 @@ Pair_print_pathsummary (FILE *fp, int pathnum, T start, T end, Chrnum_T chrnum,
int qopens, int qindels, int topens, int tindels, int goodness,
bool watsonp, int cdna_direction,
int translation_start, int translation_end, int translation_length,
- int relaastart, int relaaend, bool maponlyp,
- bool diagnosticp, int stage2_source, int stage2_indexsize);
+ int relaastart, int relaaend, int stage2_source, int stage2_indexsize);
extern void
-Pair_print_coordinates (FILE *fp, struct T *pairs, int npairs, Chrnum_T chrnum,
+Pair_print_coordinates (Filestring_T fp, struct T *pairs, int npairs, Chrnum_T chrnum,
Univcoord_T chroffset, Univ_IIT_T chromosome_iit,
bool watsonp, int invertmode);
+extern int
+Pair_cmp (const void *a, const void *b);
+
extern void
Pair_dump_one (T this, bool zerobasedp);
extern void
Pair_dump_list (List_T pairs, bool zerobasedp);
extern void
Pair_dump_array (struct T *pairs, int npairs, bool zerobasedp);
+extern void
+Pair_dump_array_stderr (struct T *pairs, int npairs, bool zerobasedp);
extern Chrpos_T
Pair_genomicpos (struct T *pairs, int npairs, int querypos, bool headp);
extern int
@@ -128,14 +129,19 @@ extern bool
Pair_check_array (struct T *pairs, int npairs);
extern List_T
Pair_convert_array_to_pairs (List_T pairs, struct T *pairarray, int npairs, bool plusp, int querylength,
- int clipdir, int hardclip_low, int hardclip_high, bool first_read_p, int queryseq_offset);
+ int hardclip_low, int hardclip_high, int queryseq_offset);
extern void
-Pair_print_exonsummary (FILE *fp, struct T *pairs, int npairs, Chrnum_T chrnum,
+Pair_print_exonsummary (Filestring_T fp, struct T *pairs, int npairs, Chrnum_T chrnum,
Univcoord_T chroffset, Genome_T genome, Univ_IIT_T chromosome_iit,
bool watsonp, int cdna_direction, bool genomefirstp, int invertmode);
extern void
-Pair_print_gff3 (FILE *fp, struct T *pairs, int npairs, int pathnum, char *accession,
+Pair_tokens_free (List_T *tokens);
+extern List_T
+Pair_tokens_copy (List_T old);
+
+extern void
+Pair_print_gff3 (Filestring_T fp, struct T *pairs, int npairs, int pathnum, char *accession,
T start, T end, Chrnum_T chrnum, Univ_IIT_T chromosome_iit, Sequence_T usersegment,
int translation_end,
int querylength_given, int skiplength, int matches, int mismatches,
@@ -144,19 +150,20 @@ Pair_print_gff3 (FILE *fp, struct T *pairs, int npairs, int pathnum, char *acces
#ifdef GSNAP
extern void
-Pair_print_m8 (FILE *fp, struct T *pairs_querydir, int npairs, bool invertedp,
+Pair_print_m8 (Filestring_T fp, struct T *pairs_querydir, int npairs, bool invertedp,
Chrnum_T chrnum, Shortread_T queryseq, Shortread_T headerseq,
char *acc_suffix, Univ_IIT_T chromosome_iit);
#endif
extern void
-Pair_print_gsnap (FILE *fp, struct T *pairs, int npairs, int nsegments, bool invertedp,
+Pair_print_gsnap (Filestring_T fp, struct T *pairs, int npairs, int nsegments, bool invertedp,
Endtype_T start_endtype, Endtype_T end_endtype,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
int querylength, bool watsonp, int cdna_direction, int score,
int insertlength, int pairscore, int mapq_score,
Univ_IIT_T chromosome_iit, IIT_T splicesites_iit,
- int *splicesites_divint_crosstable, int donor_typeint, int acceptor_typeint);
+ int *splicesites_divint_crosstable, int donor_typeint, int acceptor_typeint,
+ bool pairedp, GMAP_source_T gmap_source);
extern void
Pair_fix_cdna_direction_array (struct T *pairs_querydir, int npairs, int cdna_direction);
@@ -169,6 +176,8 @@ Pair_guess_cdna_direction (int *sensedir, List_T pairs, bool invertedp,
extern int
Pair_gsnap_nsegments (int *total_nmismatches, int *total_nindels, int *nintrons,
int *nindelbreaks, struct T *pairs, int npairs);
+extern int
+Pair_tokens_cigarlength (List_T tokens);
extern int
@@ -185,13 +194,16 @@ Pair_check_cigar (struct T *pairs, int npairs, int querylength_given,
extern List_T
Pair_clean_cigar (List_T tokens, bool watsonp);
+extern List_T
+Pair_compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struct T *pairs, int npairs, int querylength_given,
+ bool watsonp, int sensedir, int chimera_part);
extern void
-Pair_print_sam (FILE *fp, char *abbrev, struct T *pairs, int npairs,
+Pair_print_sam (Filestring_T fp, char *abbrev, struct T *pairs, int npairs, List_T cigar_tokens, bool intronp,
char *acc1, char *acc2, Chrnum_T chrnum, Univ_IIT_T chromosome_iit, Sequence_T usersegment,
char *queryseq_ptr, char *quality_string,
int clipdir, int hardclip_low, int hardclip_high, int querylength_given,
- bool watsonp, int cdna_direction, int chimera_part, Chimera_T chimera,
+ bool watsonp, int sensedir, int chimera_part, Chimera_T chimera,
int quality_shift, bool first_read_p, int pathnum, int npaths,
int absmq_score, int first_absmq, int second_absmq, Chrpos_T chrpos, Chrpos_T chrlength,
#ifdef GSNAP
@@ -199,21 +211,21 @@ Pair_print_sam (FILE *fp, char *abbrev, struct T *pairs, int npairs,
int pair_mapq_score, int end_mapq_score,
Chrnum_T mate_chrnum, Chrnum_T mate_effective_chrnum,
Chrpos_T mate_chrpos, Chrpos_T mate_chrlength,
- int mate_cdna_direction, int pairedlength,
+ int mate_sensedir, int pairedlength,
#else
int mapq_score, bool sam_paired_p,
#endif
- char *sam_read_group_id, bool invertp, bool circularp, bool merged_overlap_p);
+ char *sam_read_group_id, bool invertp, bool circularp, bool merged_overlap_p, bool sarrayp);
extern void
-Pair_print_sam_nomapping (FILE *fp, char *abbrev, char *acc1, char *acc2, char *queryseq_ptr,
+Pair_print_sam_nomapping (Filestring_T fp, char *abbrev, char *acc1, char *acc2, char *queryseq_ptr,
char *quality_string, int querylength, int quality_shift,
bool first_read_p, bool sam_paired_p, char *sam_read_group_id);
extern Uintlist_T
Pair_exonbounds (struct T *pairs, int npairs, Univcoord_T chroffset);
extern void
-Pair_print_pslformat_nt (FILE *fp, struct T *pairs, int npairs, T start, T end,
+Pair_print_pslformat_nt (Filestring_T fp, struct T *pairs, int npairs, T start, T end,
Sequence_T queryseq, Chrnum_T chrnum,
Univ_IIT_T chromosome_iit, Sequence_T usersegment,
int matches, int unknowns, int mismatches,
@@ -221,26 +233,26 @@ Pair_print_pslformat_nt (FILE *fp, struct T *pairs, int npairs, T start, T end,
extern void
-Pair_print_pslformat_pro (FILE *fp, struct T *pairs, int npairs, T start, T end,
+Pair_print_pslformat_pro (Filestring_T fp, struct T *pairs, int npairs, T start, T end,
Sequence_T queryseq, Chrnum_T chrnum,
Univ_IIT_T chromosome_iit, Sequence_T usersegment,
bool watsonp, int cdna_direction);
extern void
-Pair_print_exons (FILE *fp, struct T *pairs, int npairs, int wraplength, int ngap, bool cdnap);
+Pair_print_exons (Filestring_T fp, struct T *pairs, int npairs, int wraplength, int ngap, bool cdnap);
extern void
-Pair_print_protein_genomic (FILE *fp, struct T *ptr, int npairs, int wraplength, bool forwardp);
+Pair_print_protein_genomic (Filestring_T fp, struct T *ptr, int npairs, int wraplength, bool forwardp);
#ifdef PMAP
extern void
-Pair_print_nucleotide_cdna (FILE *fp, struct T *ptr, int npairs, int wraplength);
+Pair_print_nucleotide_cdna (Filestring_T fp, struct T *ptr, int npairs, int wraplength);
#else
extern void
-Pair_print_protein_cdna (FILE *fp, struct T *ptr, int npairs, int wraplength, bool forwardp);
+Pair_print_protein_cdna (Filestring_T fp, struct T *ptr, int npairs, int wraplength, bool forwardp);
#endif
extern void
-Pair_print_compressed (FILE *fp, int pathnum, int npaths, T start, T end, Sequence_T queryseq, char *dbversion,
+Pair_print_compressed (Filestring_T fp, int pathnum, int npaths, T start, T end, Sequence_T queryseq, char *dbversion,
Sequence_T usersegment, int nexons, double fracidentity,
struct T *pairs, int npairs, Chrnum_T chrnum,
Univcoord_T chroffset, Univ_IIT_T chromosome_iit, int querylength_given,
@@ -249,16 +261,16 @@ Pair_print_compressed (FILE *fp, int pathnum, int npaths, T start, T end, Sequen
int chimera_cdna_direction, char *strain, bool watsonp, int cdna_direction);
extern void
-Pair_print_iit_map (FILE *fp, Sequence_T queryseq, char *accession,
+Pair_print_iit_map (Filestring_T fp, Sequence_T queryseq, char *accession,
T start, T end, Chrnum_T chrnum, Univ_IIT_T chromosome_iit);
extern void
-Pair_print_iit_exon_map (FILE *fp, struct T *pairs, int npairs, Sequence_T queryseq, char *accession,
+Pair_print_iit_exon_map (Filestring_T fp, struct T *pairs, int npairs, Sequence_T queryseq, char *accession,
T start, T end, Chrnum_T chrnum, Univ_IIT_T chromosome_iit);
extern void
-Pair_print_splicesites (FILE *fp, struct T *pairs, int npairs, char *accession,
+Pair_print_splicesites (Filestring_T fp, struct T *pairs, int npairs, char *accession,
int nexons, Chrnum_T chrnum, Univ_IIT_T chromosome_iit, bool watsonp);
extern void
-Pair_print_introns (FILE *fp, struct T *pairs, int npairs, char *accession,
+Pair_print_introns (Filestring_T fp, struct T *pairs, int npairs, char *accession,
int nexons, Chrnum_T chrnum, Univ_IIT_T chromosome_iit);
extern int
diff --git a/src/pairdef.h b/src/pairdef.h
index 527768d..4a40fc9 100644
--- a/src/pairdef.h
+++ b/src/pairdef.h
@@ -1,6 +1,7 @@
-/* $Id: pairdef.h 115496 2013-11-19 01:08:40Z twu $ */
+/* $Id: pairdef.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef PAIRDEF_INCLUDED
#define PAIRDEF_INCLUDED
+
#include "bool.h"
#include "genomicpos.h"
diff --git a/src/parserange.h b/src/parserange.h
index 2f7d92d..013434c 100644
--- a/src/parserange.h
+++ b/src/parserange.h
@@ -1,5 +1,7 @@
+/* $Id: parserange.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef PARSERANGE_INCLUDED
#define PARSERANGE_INCLUDED
+
#include "bool.h"
#include "genomicpos.h"
#include "types.h"
diff --git a/src/popcount.c b/src/popcount.c
index 780aa46..09227a0 100644
--- a/src/popcount.c
+++ b/src/popcount.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: popcount.c 157234 2015-01-22 18:59:19Z twu $";
+static char rcsid[] = "$Id: popcount.c 157233 2015-01-22 18:58:26Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/popcount.h b/src/popcount.h
index 88d2244..17802ae 100644
--- a/src/popcount.h
+++ b/src/popcount.h
@@ -1,4 +1,4 @@
-/* $Id: popcount.h 157232 2015-01-22 18:55:31Z twu $ */
+/* $Id: popcount.h 157225 2015-01-22 18:47:23Z twu $ */
#ifndef POPCOUNT_INCLUDED
#define POPCOUNT_INCLUDED
#ifdef HAVE_CONFIG_H
diff --git a/src/reader.h b/src/reader.h
index c4714dc..41d528d 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -1,6 +1,7 @@
-/* $Id: reader.h 57095 2012-02-03 19:43:00Z twu $ */
+/* $Id: reader.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef READER_INCLUDED
#define READER_INCLUDED
+
#include <stdio.h>
#include "bool.h"
diff --git a/src/request.c b/src/request.c
index f16af48..30ea346 100644
--- a/src/request.c
+++ b/src/request.c
@@ -1,9 +1,10 @@
-static char rcsid[] = "$Id: request.c 40330 2011-05-30 17:40:46Z twu $";
+static char rcsid[] = "$Id: request.c 155282 2014-12-12 19:42:54Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "request.h"
+#include "assert.h"
#include "mem.h"
#define T Request_T
@@ -58,7 +59,8 @@ Request_free (T *old) {
return;
}
-#else
+
+#else /* not GSNAP */
Sequence_T
Request_queryseq (T this) {
@@ -83,5 +85,4 @@ Request_free (T *old) {
return;
}
-#endif
-
+#endif /* not GSNAP */
diff --git a/src/request.h b/src/request.h
index 85fa9e7..c7a75b1 100644
--- a/src/request.h
+++ b/src/request.h
@@ -1,4 +1,4 @@
-/* $Id: request.h 40271 2011-05-28 02:29:18Z twu $ */
+/* $Id: request.h 155282 2014-12-12 19:42:54Z twu $ */
#ifndef REQUEST_INCLUDED
#define REQUEST_INCLUDED
@@ -32,6 +32,7 @@ Request_new (int id, Sequence_T queryseq);
#endif
+
extern void
Request_free (T *old);
diff --git a/src/resulthr.c b/src/resulthr.c
index 9189f0d..3f10925 100644
--- a/src/resulthr.c
+++ b/src/resulthr.c
@@ -1,10 +1,11 @@
-static char rcsid[] = "$Id: resulthr.c 109569 2013-09-30 22:55:30Z twu $";
+static char rcsid[] = "$Id: resulthr.c 155282 2014-12-12 19:42:54Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "resulthr.h"
#include <stdlib.h>
+#include "assert.h"
#include "mem.h"
#include "stage3hr.h"
@@ -31,6 +32,10 @@ struct T {
int first_absmq2;
int second_absmq2;
double worker_runtime;
+
+ SAM_split_output_type split_output;
+ int strlength;
+ char *string;
};
@@ -119,8 +124,7 @@ Result_worker_runtime (T this) {
T
-Result_single_read_new (int id, void **resultarray, int npaths, int first_absmq, int second_absmq,
- double worker_runtime) {
+Result_single_read_new (int id, void **resultarray, int npaths, int first_absmq, int second_absmq) {
T new = (T) MALLOC_OUT(sizeof(*new));
Stage3end_T stage3end;
@@ -156,14 +160,13 @@ Result_single_read_new (int id, void **resultarray, int npaths, int first_absmq,
new->npaths = npaths;
new->first_absmq = first_absmq;
new->second_absmq = second_absmq;
- new->worker_runtime = worker_runtime;
return new;
}
T
Result_paired_read_new (int id, void **resultarray, int npaths, int first_absmq, int second_absmq,
- Pairtype_T final_pairtype, double worker_runtime) {
+ Pairtype_T final_pairtype) {
T new = (T) MALLOC_OUT(sizeof(*new));
Stage3pair_T stage3pair;
@@ -211,15 +214,13 @@ Result_paired_read_new (int id, void **resultarray, int npaths, int first_absmq,
new->npaths = npaths;
new->first_absmq = first_absmq;
new->second_absmq = second_absmq;
- new->worker_runtime = worker_runtime;
return new;
}
T
Result_paired_as_singles_new (int id, void **hits5, int npaths5, int first_absmq5, int second_absmq5,
- void **hits3, int npaths3, int first_absmq3, int second_absmq3,
- double worker_runtime) {
+ void **hits3, int npaths3, int first_absmq3, int second_absmq3) {
T new = (T) MALLOC_OUT(sizeof(*new));
Stage3end_T stage3end_5, stage3end_3;
@@ -271,7 +272,6 @@ Result_paired_as_singles_new (int id, void **hits5, int npaths5, int first_absmq
new->npaths2 = npaths3;
new->first_absmq2 = first_absmq3;
new->second_absmq2 = second_absmq3;
- new->worker_runtime = worker_runtime;
return new;
}
@@ -321,3 +321,4 @@ Result_free (T *old) {
}
+
diff --git a/src/resulthr.h b/src/resulthr.h
index 926b312..c205ce2 100644
--- a/src/resulthr.h
+++ b/src/resulthr.h
@@ -1,7 +1,9 @@
-/* $Id: resulthr.h 91116 2013-04-02 19:50:33Z twu $ */
+/* $Id: resulthr.h 155282 2014-12-12 19:42:54Z twu $ */
#ifndef RESULTHR_INCLUDED
#define RESULTHR_INCLUDED
+
#include "bool.h"
+#include "samflags.h" /* for SAM_split_output_type */
/* PAIRED_UNSPECIFIED assigned only by Stage1hr_paired_read */
typedef enum {CONCORDANT, PAIRED_UNSPECIFIED, PAIRED_INVERSION, PAIRED_SCRAMBLE, PAIRED_TOOLONG,
@@ -14,6 +16,7 @@ typedef enum {SINGLEEND_NOMAPPING, PAIREDEND_NOMAPPING,
HALFMAPPING_UNIQ, HALFMAPPING_TRANSLOC, HALFMAPPING_MULT,
UNPAIRED_UNIQ, UNPAIRED_TRANSLOC, UNPAIRED_MULT} Resulttype_T;
+
#define T Result_T
typedef struct T *T;
@@ -31,18 +34,14 @@ extern void **
Result_array (int *npaths, int *first_absmq, int *second_absmq, T this);
extern void **
Result_array2 (int *npaths, int *first_absmq, int *second_absmq, T this);
-extern double
-Result_worker_runtime (T this);
extern T
-Result_single_read_new (int id, void **resultarray, int npaths, int first_absmq, int second_absmq,
- double worker_runtime);
+Result_single_read_new (int id, void **resultarray, int npaths, int first_absmq, int second_absmq);
extern T
Result_paired_read_new (int id, void **resultarray, int npaths, int first_absmq, int second_absmq,
- Pairtype_T final_pairtype, double worker_runbtime);
+ Pairtype_T final_pairtype);
extern T
Result_paired_as_singles_new (int id, void **hits5, int npaths5, int first_absmq5, int second_absmq5,
- void **hits3, int npaths3, int first_absmq3, int second_absmq3,
- double worker_runtime);
+ void **hits3, int npaths3, int first_absmq3, int second_absmq3);
extern void
Result_free (T *old);
diff --git a/src/sam_sort.c b/src/sam_sort.c
index 97c9795..9438ad7 100644
--- a/src/sam_sort.c
+++ b/src/sam_sort.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sam_sort.c 154454 2014-12-02 19:30:27Z twu $";
+static char rcsid[] = "$Id: sam_sort.c 155408 2014-12-16 07:00:44Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -23,6 +23,7 @@ static char rcsid[] = "$Id: sam_sort.c 154454 2014-12-02 19:30:27Z twu $";
#include "samflags.h"
#include "stopwatch.h"
#include "datadir.h"
+#include "filestring.h"
#include "getopt.h"
@@ -128,7 +129,7 @@ static bool multiple_primaries_p = false;
static Stopwatch_T stopwatch = NULL;
-static char *sevenway_root = NULL;
+static char *split_output_root = NULL;
static bool appendp = false;
static FILE **outputs = NULL;
@@ -234,12 +235,13 @@ struct T {
Univcoord_T genomicpos;
Univcoord_T genomicpos_extend_softclip;
Univcoord_T mate_genomicpos;
+ int filei;
off_t linestart;
int linelen;
char *acc; /* Needed for ACC_SECONDARY_SORT */
- int readindex; /* Needed only for marking duplicates to find the other queryseq */
+ int readindex; /* inputi or outputi. Needed for marking duplicates to find the other queryseq */
char *queryseq5;
char *queryseq3;
@@ -275,8 +277,8 @@ Cell_standardize_queryseqs (T this) {
/* initial_softclip needs to be determined only if we are marking duplicates */
static void
Cell_fill (struct T *this, int readindex, unsigned int flag, SAM_split_output_type split_output,
- bool query_lowp, int initial_softclip, Univcoord_T genomicpos, off_t fileposition,
- int linelen) {
+ bool query_lowp, int initial_softclip, Univcoord_T genomicpos,
+ int filei, off_t fileposition, int linelen) {
this->readindex = readindex;
@@ -287,6 +289,7 @@ Cell_fill (struct T *this, int readindex, unsigned int flag, SAM_split_output_ty
this->genomicpos = genomicpos;
this->genomicpos_extend_softclip = this->genomicpos - initial_softclip;
+ this->filei = filei;
this->linestart = fileposition;
this->linelen = linelen;
@@ -299,7 +302,7 @@ Cell_fill (struct T *this, int readindex, unsigned int flag, SAM_split_output_ty
/* initial_softclip needs to be determined only if we are marking duplicates */
static void
Cell_fill_nodups (struct T *this, unsigned int flag, SAM_split_output_type split_output,
- Univcoord_T genomicpos, off_t fileposition, int linelen) {
+ Univcoord_T genomicpos, int filei, off_t fileposition, int linelen) {
this->readindex = 0;
@@ -310,6 +313,7 @@ Cell_fill_nodups (struct T *this, unsigned int flag, SAM_split_output_type split
this->genomicpos = genomicpos;
this->genomicpos_extend_softclip = genomicpos;
+ this->filei = filei;
this->linestart = fileposition;
this->linelen = linelen;
@@ -341,16 +345,40 @@ print_fromfile (FILE *fp, off_t fileposition, int linelength) {
#endif
+
static void
-Cell_print_fromfile (FILE *fp_input, T this) {
+Cell_print_fromfile (FILE *fp_input, T this, Filestring_T headers) {
char buffer[CHUNK];
int linelength = this->linelen;
FILE *fp_output;
- if (outputs == NULL) {
- fp_output = stdout;
+#if 0
+ if (nofailsp == true && this->split_output == OUTPUT_NM) {
+ /* Skip */
+ return;
+
+ } else if (failsonlyp == true && this->split_output != OUTPUT_NM &&
+ this->split_output != OUTPUT_HX && this->split_output != OUTPUT_UX &&
+ this->split_output != OUTPUT_PX && this->split_output != OUTPUT_CX) {
+ return;
+ }
+
+ if (failedinput_root != NULL && primaryp(this->flag) == true) {
+ /* Convert SAM line to FASTA or FASTQ and write to a failedinput file */
+ }
+#endif
+
+ if (split_output_root == NULL) {
+ if ((fp_output = outputs[0]) == NULL) {
+ fp_output = outputs[0] = stdout;
+ Filestring_print(fp_output,headers);
+ }
+
} else {
- fp_output = outputs[this->split_output];
+ if ((fp_output = outputs[this->split_output]) == NULL) {
+ fp_output = outputs[this->split_output] = SAM_header_open_file(this->split_output,split_output_root,/*appendp*/false);
+ Filestring_print(fp_output,headers);
+ }
}
moveto(fp_input,this->linestart);
@@ -553,9 +581,12 @@ Cell_find (int lowi, int highi, T *cells, Univcoord_T goal, int readindex) {
static void
-process_without_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncells,
- Univ_IIT_T chromosome_iit, Univcoord_T *chroffsets) {
+process_without_dups (FILE **sam_inputs, int *headerlengths, int *ncells, int ninputs,
+ Intlist_T linelengths, int ncells_total, Univ_IIT_T chromosome_iit,
+ Univcoord_T *chroffsets, Filestring_T headers) {
T *cells;
+ FILE *fp_sam;
+ int filei, linei;
int n_mappers = 0, n_nomappers = 0;
Intlist_T l;
struct T *cells_allocated, *ptr;
@@ -569,61 +600,72 @@ process_without_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int nc
int acclength;
- ptr = cells_allocated = (struct T *) MALLOC(ncells * sizeof(struct T));
- cells = (T *) MALLOC(ncells * sizeof(T));
- for (i = 0; i < ncells; i++) {
+ ptr = cells_allocated = (struct T *) MALLOC(ncells_total * sizeof(struct T));
+ cells = (T *) MALLOC(ncells_total * sizeof(T));
+ for (i = 0; i < ncells_total; i++) {
cells[i] = &(ptr[i]);
}
+ fprintf(stderr,"Reading SAM files...\n");
+
k = 0;
- fileposition = headerlen;
- for (l = linelengths; l != NULL; l = Intlist_next(l)) {
- linelen = Intlist_head(l);
- moveto(fp_sam,fileposition);
- genomicpos = Samread_parse_genomicpos_fromfile(fp_sam,&flag,&split_output,
- chromosome_iit,chroffsets,linelen);
- Cell_fill_nodups(cells[k++],flag,split_output,genomicpos,fileposition,linelen);
- if (flag & QUERY_UNMAPPED) {
- n_nomappers++;
- } else {
- n_mappers++;
+ l = linelengths;
+ for (filei = 0; filei < ninputs; filei++) {
+ fprintf(stderr," Reading file %d...",filei+1);
+ fp_sam = sam_inputs[filei];
+ fileposition = headerlengths[filei];
+ for (linei = 0; linei < ncells[filei]; linei++) {
+ linelen = Intlist_head(l);
+ moveto(fp_sam,fileposition);
+ genomicpos = Samread_parse_genomicpos_fromfile(fp_sam,&flag,&split_output,
+ chromosome_iit,chroffsets,linelen);
+ Cell_fill_nodups(cells[k++],flag,split_output,genomicpos,filei,fileposition,linelen);
+ if (flag & QUERY_UNMAPPED) {
+ n_nomappers++;
+ } else {
+ n_mappers++;
+ }
+ fileposition += linelen;
+ l = Intlist_next(l);
}
- fileposition += linelen;
- }
+ fprintf(stderr,"done\n");
+ }
/* Sort and print */
if (secondary_sort_method == NO_SECONDARY_SORT) {
Stopwatch_start(stopwatch);
fprintf(stderr,"Sorting entries by genomicpos...");
- qsort(cells,ncells,sizeof(T),Cell_genomicpos_cmp);
+ qsort(cells,ncells_total,sizeof(T),Cell_genomicpos_cmp);
fprintf(stderr,"done (%.1f seconds)\n",Stopwatch_stop(stopwatch));
Stopwatch_start(stopwatch);
fprintf(stderr,"Printing entries...");
- for (k = 0; k < ncells; k++) {
+ for (k = 0; k < ncells_total; k++) {
debug(printf("%u\t%u\t%d\n",cells[k]->genomicpos,cells[k]->linestart,cells[k]->linelen));
- Cell_print_fromfile(fp_sam,cells[k]);
+ fp_sam = sam_inputs[cells[k]->filei];
+ Cell_print_fromfile(fp_sam,cells[k],headers);
}
fprintf(stderr,"done (%.1f seconds)\n",Stopwatch_stop(stopwatch));
} else if (secondary_sort_method == ORIG_SECONDARY_SORT) {
Stopwatch_start(stopwatch);
fprintf(stderr,"Sorting entries by genomicpos and original file position...");
- qsort(cells,ncells,sizeof(T),Cell_genomicpos_linestart_cmp);
+ qsort(cells,ncells_total,sizeof(T),Cell_genomicpos_linestart_cmp);
fprintf(stderr,"done (%.1f seconds)\n",Stopwatch_stop(stopwatch));
Stopwatch_start(stopwatch);
fprintf(stderr,"Printing entries...");
- for (k = 0; k < ncells; k++) {
- Cell_print_fromfile(fp_sam,cells[k]);
+ for (k = 0; k < ncells_total; k++) {
+ fp_sam = sam_inputs[cells[k]->filei];
+ Cell_print_fromfile(fp_sam,cells[k],headers);
}
fprintf(stderr,"done (%.1f seconds)\n",Stopwatch_stop(stopwatch));
} else if (secondary_sort_method == ACC_SECONDARY_SORT) {
Stopwatch_start(stopwatch);
fprintf(stderr,"Sorting entries by genomicpos...");
- qsort(cells,ncells,sizeof(T),Cell_genomicpos_cmp);
+ qsort(cells,ncells_total,sizeof(T),Cell_genomicpos_cmp);
fprintf(stderr,"done (%.1f seconds)\n",Stopwatch_stop(stopwatch));
Stopwatch_start(stopwatch);
@@ -637,6 +679,7 @@ process_without_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int nc
if (j > i + 1) {
for (k = i; k < j; k++) {
+ fp_sam = sam_inputs[cells[k]->filei];
moveto(fp_sam,cells[k]->linestart);
cells[k]->acc = Samread_get_acc_fromfile(&acclength,fp_sam,cells[k]->linelen);
}
@@ -648,33 +691,36 @@ process_without_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int nc
}
for (k = i; k < j; k++) {
- Cell_print_fromfile(fp_sam,cells[k]);
+ fp_sam = sam_inputs[cells[k]->filei];
+ Cell_print_fromfile(fp_sam,cells[k],headers);
}
i = j;
}
- if (ncells > n_mappers + 1) {
- for (k = n_mappers; k < ncells; k++) {
+ if (ncells_total > n_mappers + 1) {
+ for (k = n_mappers; k < ncells_total; k++) {
+ fp_sam = sam_inputs[cells[k]->filei];
moveto(fp_sam,cells[k]->linestart);
cells[k]->acc = Samread_get_acc_fromfile(&acclength,fp_sam,cells[k]->linelen);
}
qsort(&(cells[n_mappers]),n_nomappers,sizeof(T),Cell_accession_cmp);
- for (k = n_mappers; k < ncells; k++) {
+ for (k = n_mappers; k < ncells_total; k++) {
FREE(cells[k]->acc);
}
}
- for (k = n_mappers; k < ncells; k++) {
- Cell_print_fromfile(fp_sam,cells[k]);
+ for (k = n_mappers; k < ncells_total; k++) {
+ fp_sam = sam_inputs[cells[k]->filei];
+ Cell_print_fromfile(fp_sam,cells[k],headers);
}
fprintf(stderr,"done (%.1f seconds)\n",Stopwatch_stop(stopwatch));
} else if (secondary_sort_method == MATEFWD_SECONDARY_SORT || secondary_sort_method == MATEREV_SECONDARY_SORT) {
Stopwatch_start(stopwatch);
fprintf(stderr,"Sorting entries by genomicpos...");
- qsort(cells,ncells,sizeof(T),Cell_genomicpos_cmp);
+ qsort(cells,ncells_total,sizeof(T),Cell_genomicpos_cmp);
fprintf(stderr,"done (%.1f seconds)\n",Stopwatch_stop(stopwatch));
Stopwatch_start(stopwatch);
@@ -700,14 +746,15 @@ process_without_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int nc
}
for (k = i; k < j; k++) {
- Cell_print_fromfile(fp_sam,cells[k]);
+ fp_sam = sam_inputs[cells[k]->filei];
+ Cell_print_fromfile(fp_sam,cells[k],headers);
}
i = j;
}
- if (ncells > n_mappers + 1) {
- for (k = n_mappers; k < ncells; k++) {
+ if (ncells_total > n_mappers + 1) {
+ for (k = n_mappers; k < ncells_total; k++) {
moveto(fp_sam,cells[k]->linestart);
cells[k]->mate_genomicpos = Samread_parse_mate_genomicpos_fromfile(fp_sam,chromosome_iit,chroffsets,cells[k]->linelen);
}
@@ -719,8 +766,9 @@ process_without_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int nc
}
}
- for (k = n_mappers; k < ncells; k++) {
- Cell_print_fromfile(fp_sam,cells[k]);
+ for (k = n_mappers; k < ncells_total; k++) {
+ fp_sam = sam_inputs[cells[k]->filei];
+ Cell_print_fromfile(fp_sam,cells[k],headers);
}
fprintf(stderr,"done (%.1f seconds)\n",Stopwatch_stop(stopwatch));
@@ -737,8 +785,11 @@ process_without_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int nc
static int
-process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncells,
- Univ_IIT_T chromosome_iit, Univcoord_T *chroffsets) {
+process_with_dups (FILE **sam_inputs, int *headerlengths, int *ncells, int ninputs,
+ Intlist_T linelengths, int ncells_total, Univ_IIT_T chromosome_iit,
+ Univcoord_T *chroffsets, Filestring_T headers) {
+ FILE *fp_sam;
+ int filei, linei;
int nmarked = 0;
int n_mappers = 0, n_nomappers = 0;
T *cells, mate;
@@ -766,12 +817,12 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
/* Actually, array lengths should be nreads, but we don't know that yet */
- queryseq5_index = (int *) CALLOC(ncells,sizeof(int));
- queryseq3_index = (int *) CALLOC(ncells,sizeof(int));
+ queryseq5_index = (int *) CALLOC(ncells_total,sizeof(int));
+ queryseq3_index = (int *) CALLOC(ncells_total,sizeof(int));
- ptr = cells_allocated = (struct T *) MALLOC(ncells * sizeof(struct T));
- cells = (T *) MALLOC(ncells * sizeof(T));
- for (i = 0; i < ncells; i++) {
+ ptr = cells_allocated = (struct T *) MALLOC(ncells_total * sizeof(struct T));
+ cells = (T *) MALLOC(ncells_total * sizeof(T));
+ for (i = 0; i < ncells_total; i++) {
cells[i] = &(ptr[i]);
}
@@ -781,58 +832,68 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
last_acclength = 0;
readindex = -1; /* readindex is 0-based */
+ fprintf(stderr,"Reading SAM files...\n");
+
k = 0;
- fileposition = headerlen;
- for (l = linelengths; l != NULL; l = Intlist_next(l)) {
- linelen = Intlist_head(l);
- moveto(fp_sam,fileposition);
- acc = Samread_parse_acc_and_softclip_fromfile(&acclength,&flag,&split_output,&hiti,
- &genomicpos,&initial_softclip,&query_lowp,
- fp_sam,chromosome_iit,chroffsets,linelen);
- if (acclength != last_acclength) {
- readindex++;
- } else if (strcmp(acc,last_acc)) {
- readindex++;
- }
- FREE(last_acc);
- last_acc = acc;
- last_acclength = acclength;
+ l = linelengths;
+ for (filei = 0; filei < ninputs; filei++) {
+ fprintf(stderr," Reading file %d...",filei+1);
+ fp_sam = sam_inputs[filei];
+ fileposition = headerlengths[filei];
+ for (linei = 0; linei < ncells[filei]; linei++) {
+ linelen = Intlist_head(l);
+ moveto(fp_sam,fileposition);
+ acc = Samread_parse_acc_and_softclip_fromfile(&acclength,&flag,&split_output,&hiti,
+ &genomicpos,&initial_softclip,&query_lowp,
+ fp_sam,chromosome_iit,chroffsets,linelen);
+ if (acclength != last_acclength) {
+ readindex++;
+ } else if (strcmp(acc,last_acc)) {
+ readindex++;
+ }
+ FREE(last_acc);
+ last_acc = acc;
+ last_acclength = acclength;
- if (flag & QUERY_UNMAPPED) {
- n_nomappers++;
- } else {
- n_mappers++;
- }
+ if (flag & QUERY_UNMAPPED) {
+ n_nomappers++;
+ } else {
+ n_mappers++;
+ }
- /* debug(printf("Read readindex %d, chrnum %d, chrpos %u, linelen %d\n",readindex,chrnum,chrpos,linelen)); */
- if (flag & NOT_PRIMARY) {
+ /* debug(printf("Read readindex %d, chrnum %d, chrpos %u, linelen %d\n",readindex,chrnum,chrpos,linelen)); */
+ if (flag & NOT_PRIMARY) {
/* Don't use secondary hit for accessing reads */
- } else if (multiple_primaries_p == true) {
+ } else if (multiple_primaries_p == true) {
#if 0
- /* Now always parsed */
- hiti = Samread_parse_aux_fromfile(fp_sam,/*auxfield*/"HI",linelen);
+ /* Now always parsed */
+ hiti = Samread_parse_aux_fromfile(fp_sam,/*auxfield*/"HI",linelen);
#endif
- if (strcmp(hiti,"1")) {
- /* Don't use second or later primary hit for accessing reads */
- } else if (flag & FIRST_READ_P) {
- queryseq5_index[readindex] = k;
+ if (strcmp(hiti,"1")) {
+ /* Don't use second or later primary hit for accessing reads */
+ } else if (flag & FIRST_READ_P) {
+ queryseq5_index[readindex] = k;
+ } else {
+ queryseq3_index[readindex] = k;
+ }
+
} else {
- queryseq3_index[readindex] = k;
+ if (flag & FIRST_READ_P) {
+ queryseq5_index[readindex] = k;
+ } else {
+ queryseq3_index[readindex] = k;
+ }
}
+
+ FREE(hiti);
+ Cell_fill(cells[k++],readindex,flag,split_output,query_lowp,initial_softclip,genomicpos,filei,fileposition,linelen);
- } else {
- if (flag & FIRST_READ_P) {
- queryseq5_index[readindex] = k;
- } else {
- queryseq3_index[readindex] = k;
- }
+ fileposition += linelen;
+ l = Intlist_next(l);
}
- FREE(hiti);
- Cell_fill(cells[k++],readindex,flag,split_output,query_lowp,initial_softclip,genomicpos,fileposition,linelen);
-
- fileposition += linelen;
+ fprintf(stderr,"done\n");
}
FREE(last_acc);
@@ -842,7 +903,7 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
/* Sort entries, based on genomicpos_extend_softclip */
Stopwatch_start(stopwatch);
fprintf(stderr,"Sorting SAM lines...");
- qsort(cells,ncells,sizeof(T),Cell_genomicpos_extend_softclip_lowhigh_cmp);
+ qsort(cells,ncells_total,sizeof(T),Cell_genomicpos_extend_softclip_lowhigh_cmp);
fprintf(stderr,"done (%.1f seconds)\n",Stopwatch_stop(stopwatch));
/* Mark all duplicates within mappers, based on genomicpos_extend_softclip */
@@ -869,8 +930,9 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
/* Multiple low hits with same chrpos, so opportunity to mark duplicatep */
/* Find queryseqs for each */
for (k = i; k < j_low; k++) {
+ fp_sam = sam_inputs[cells[k]->filei];
debug9(printf("Looking for queryseqs for "));
- debug9(Cell_print_fromfile(fp_sam,cells[k]));
+ debug9(Cell_print_fromfile(fp_sam,cells[k],headers));
if (cells[k]->flag & FIRST_READ_P) {
debug9(printf("Flag for entry %d is %u, indicating a first read\n",k,cells[k]->flag));
@@ -885,8 +947,9 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
mate_allocated = queryseq3_index[cells[k]->readindex];
mate = &(cells_allocated[mate_allocated]);
+
debug9(printf("Mate is "));
- debug9(Cell_print_fromfile(fp_sam,mate));
+ debug9(Cell_print_fromfile(fp_sam,mate,headers));
moveto(fp_sam,mate->linestart);
Samread_parse_read_fromfile(fp_sam,&flag,&readlength,&read,mate->linelen);
if (mate->flag & QUERY_MINUSP) {
@@ -910,7 +973,7 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
mate_allocated = queryseq5_index[cells[k]->readindex];
mate = &(cells_allocated[mate_allocated]);
debug9(printf("Mate is "));
- debug9(Cell_print_fromfile(fp_sam,mate));
+ debug9(Cell_print_fromfile(fp_sam,mate,headers));
moveto(fp_sam,mate->linestart);
Samread_parse_read_fromfile(fp_sam,&flag,&readlength,&read,mate->linelen);
if (mate->flag & QUERY_MINUSP) {
@@ -965,8 +1028,9 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
/* Multiple high hits with same chrpos, so opportunity to mark duplicatep */
/* Find queryseqs for each */
for (k = j_low; k < j_high; k++) {
+ fp_sam = sam_inputs[cells[k]->filei];
debug9(printf("Looking for queryseqs for "));
- debug9(Cell_print_fromfile(fp_sam,cells[k]));
+ debug9(Cell_print_fromfile(fp_sam,cells[k],headers));
if (cells[k]->flag & FIRST_READ_P) {
debug9(printf("Flag for entry %d is %u, indicating a first read\n",k,cells[k]->flag));
@@ -982,7 +1046,7 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
mate_allocated = queryseq3_index[cells[k]->readindex];
mate = &(cells_allocated[mate_allocated]);
debug9(printf("Mate is "));
- debug9(Cell_print_fromfile(fp_sam,mate));
+ debug9(Cell_print_fromfile(fp_sam,mate,headers));
moveto(fp_sam,mate->linestart);
Samread_parse_read_fromfile(fp_sam,&flag,&readlength,&read,mate->linelen);
if (mate->flag & QUERY_MINUSP) {
@@ -1006,7 +1070,7 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
mate_allocated = queryseq5_index[cells[k]->readindex];
mate = &(cells_allocated[mate_allocated]);
debug9(printf("Mate is "));
- debug9(Cell_print_fromfile(fp_sam,mate));
+ debug9(Cell_print_fromfile(fp_sam,mate,headers));
moveto(fp_sam,mate->linestart);
Samread_parse_read_fromfile(fp_sam,&flag,&readlength,&read,mate->linelen);
if (mate->flag & QUERY_MINUSP) {
@@ -1047,11 +1111,12 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
}
/* Mark all duplicates within nomappers, based on queryseq */
- for (k = n_mappers; k < ncells; k++) {
+ for (k = n_mappers; k < ncells_total; k++) {
if (duplicatep[cells[k]->readindex] == true) {
cells[k]->queryseq5 = cells[k]->queryseq3 = NULL; /* Will be sorted to end of list */
} else if (cells[k]->flag & FIRST_READ_P) {
+ fp_sam = sam_inputs[cells[k]->filei];
moveto(fp_sam,cells[k]->linestart);
Samread_parse_read_fromfile(fp_sam,&flag,&readlength,&read,cells[k]->linelen);
if (cells[k]->flag & QUERY_MINUSP) {
@@ -1071,6 +1136,7 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
Cell_standardize_queryseqs(cells[k]);
} else {
+ fp_sam = sam_inputs[cells[k]->filei];
moveto(fp_sam,cells[k]->linestart);
Samread_parse_read_fromfile(fp_sam,&flag,&readlength,&read,cells[k]->linelen);
if (cells[k]->flag & QUERY_MINUSP) {
@@ -1098,7 +1164,7 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
/* Sort non-mapping entries based on queryseqs */
qsort(&(cells[n_mappers]),n_nomappers,sizeof(T),Cell_queryseq_cmp);
- for (k = n_mappers + 1; k < ncells; k++) {
+ for (k = n_mappers + 1; k < ncells_total; k++) {
debug(printf("Comparing cell %d with %d => cmp %d\n",k,k-1,Cell_queryseq_cmp(&(cells[k]),&(cells[k-1]))));
if (Cell_queryseq_cmp(&(cells[k]),&(cells[k-1])) == 0) {
readindex = cells[k]->readindex;
@@ -1110,7 +1176,7 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
}
}
- for (k = n_mappers; k < ncells; k++) {
+ for (k = n_mappers; k < ncells_total; k++) {
FREE(cells[k]->queryseq5);
FREE(cells[k]->queryseq3);
}
@@ -1121,23 +1187,24 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
Stopwatch_start(stopwatch);
fprintf(stderr,"Re-sorting entries...");
if (secondary_sort_method == NO_SECONDARY_SORT) {
- qsort(cells,ncells,sizeof(T),Cell_genomicpos_cmp);
+ qsort(cells,ncells_total,sizeof(T),Cell_genomicpos_cmp);
} else if (secondary_sort_method == ORIG_SECONDARY_SORT) {
- qsort(cells,ncells,sizeof(T),Cell_genomicpos_linestart_cmp);
+ qsort(cells,ncells_total,sizeof(T),Cell_genomicpos_linestart_cmp);
} else if (secondary_sort_method == ACC_SECONDARY_SORT) {
- qsort(cells,ncells,sizeof(T),Cell_genomicpos_cmp);
+ qsort(cells,ncells_total,sizeof(T),Cell_genomicpos_cmp);
i = 0;
- while (i < ncells) {
+ while (i < ncells_total) {
j = i + 1;
- while (j < ncells && cells[j]->genomicpos == cells[i]->genomicpos) {
+ while (j < ncells_total && cells[j]->genomicpos == cells[i]->genomicpos) {
j++;
}
if (j > i + 1) {
for (k = i; k < j; k++) {
+ fp_sam = sam_inputs[cells[k]->filei];
moveto(fp_sam,cells[k]->linestart);
cells[k]->acc = Samread_get_acc_fromfile(&acclength,fp_sam,cells[k]->linelen);
}
@@ -1152,17 +1219,18 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
} else if (secondary_sort_method == MATEFWD_SECONDARY_SORT ||
secondary_sort_method == MATEREV_SECONDARY_SORT) {
- qsort(cells,ncells,sizeof(T),Cell_genomicpos_cmp);
+ qsort(cells,ncells_total,sizeof(T),Cell_genomicpos_cmp);
i = 0;
- while (i < ncells) {
+ while (i < ncells_total) {
j = i + 1;
- while (j < ncells && cells[j]->genomicpos == cells[i]->genomicpos) {
+ while (j < ncells_total && cells[j]->genomicpos == cells[i]->genomicpos) {
j++;
}
if (j > i + 1) {
for (k = i; k < j; k++) {
+ fp_sam = sam_inputs[cells[k]->filei];
moveto(fp_sam,cells[k]->linestart);
cells[k]->mate_genomicpos = Samread_parse_mate_genomicpos_fromfile(fp_sam,chromosome_iit,chroffsets,cells[k]->linelen);
}
@@ -1182,9 +1250,10 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
Stopwatch_start(stopwatch);
fprintf(stderr,"Printing results...");
- for (k = 0; k < ncells; k++) {
+ for (k = 0; k < ncells_total; k++) {
if (duplicatep[cells[k]->readindex] == true) {
if (print_duplicates_p == true) {
+ fp_sam = sam_inputs[cells[k]->filei];
moveto(fp_sam,cells[k]->linestart);
Samread_print_as_duplicate_fromfile(fp_sam,cells[k]->linelen);
}
@@ -1192,7 +1261,8 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
} else {
if (print_unique_p == true) {
/* Non-duplicate */
- Cell_print_fromfile(fp_sam,cells[k]);
+ fp_sam = sam_inputs[cells[k]->filei];
+ Cell_print_fromfile(fp_sam,cells[k],headers);
}
}
}
@@ -1207,222 +1277,13 @@ process_with_dups (FILE *fp_sam, int headerlen, Intlist_T linelengths, int ncell
}
-/* output 0 is stdout */
-#define N_SPLIT_OUTPUTS 22
-
-static void
-split_output_open (char *sevenway_root, bool appendp) {
- char *filename;
- char *write_mode;
-
- if (appendp == true) {
- write_mode = "a";
- } else {
- write_mode = "w";
- }
-
- outputs = (FILE **) MALLOC((1+N_SPLIT_OUTPUTS) * sizeof(FILE *));
- outputs[OUTPUT_NONE] = stdout;
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".nomapping")+1,sizeof(char));
- sprintf(filename,"%s.nomapping",sevenway_root);
- if ((outputs[OUTPUT_NM] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".halfmapping_uniq")+1,sizeof(char));
- sprintf(filename,"%s.halfmapping_uniq",sevenway_root);
- if ((outputs[OUTPUT_HU] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".halfmapping_circular")+1,sizeof(char));
- sprintf(filename,"%s.halfmapping_circular",sevenway_root);
- if ((outputs[OUTPUT_HC] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".halfmapping_transloc")+1,sizeof(char));
- sprintf(filename,"%s.halfmapping_transloc",sevenway_root);
- if ((outputs[OUTPUT_HT] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".halfmapping_mult")+1,sizeof(char));
- sprintf(filename,"%s.halfmapping_mult",sevenway_root);
- if ((outputs[OUTPUT_HM] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".halfmapping_mult_xs")+1,sizeof(char));
- sprintf(filename,"%s.halfmapping_mult_xs",sevenway_root);
- if ((outputs[OUTPUT_HX] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".unpaired_uniq")+1,sizeof(char));
- sprintf(filename,"%s.unpaired_uniq",sevenway_root);
- if ((outputs[OUTPUT_UU] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".unpaired_circular")+1,sizeof(char));
- sprintf(filename,"%s.unpaired_circular",sevenway_root);
- if ((outputs[OUTPUT_UC] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".unpaired_transloc")+1,sizeof(char));
- sprintf(filename,"%s.unpaired_transloc",sevenway_root);
- if ((outputs[OUTPUT_UT] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".unpaired_mult")+1,sizeof(char));
- sprintf(filename,"%s.unpaired_mult",sevenway_root);
- if ((outputs[OUTPUT_UM] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".unpaired_mult_xs")+1,sizeof(char));
- sprintf(filename,"%s.unpaired_mult_xs",sevenway_root);
- if ((outputs[OUTPUT_UX] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".concordant_uniq")+1,sizeof(char));
- sprintf(filename,"%s.concordant_uniq",sevenway_root);
- if ((outputs[OUTPUT_CU] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".concordant_circular")+1,sizeof(char));
- sprintf(filename,"%s.concordant_circular",sevenway_root);
- if ((outputs[OUTPUT_CC] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".concordant_transloc")+1,sizeof(char));
- sprintf(filename,"%s.concordant_transloc",sevenway_root);
- if ((outputs[OUTPUT_CT] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".concordant_mult")+1,sizeof(char));
- sprintf(filename,"%s.concordant_mult",sevenway_root);
- if ((outputs[OUTPUT_CM] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".concordant_mult_xs")+1,sizeof(char));
- sprintf(filename,"%s.concordant_mult_xs",sevenway_root);
- if ((outputs[OUTPUT_CX] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".paired_uniq_circular")+1,sizeof(char));
- sprintf(filename,"%s.paired_uniq_circular",sevenway_root);
- if ((outputs[OUTPUT_PC] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".paired_uniq_inv")+1,sizeof(char));
- sprintf(filename,"%s.paired_uniq_inv",sevenway_root);
- if ((outputs[OUTPUT_PI] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".paired_uniq_scr")+1,sizeof(char));
- sprintf(filename,"%s.paired_uniq_scr",sevenway_root);
- if ((outputs[OUTPUT_PS] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".paired_uniq_long")+1,sizeof(char));
- sprintf(filename,"%s.paired_uniq_long",sevenway_root);
- if ((outputs[OUTPUT_PL] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".paired_mult")+1,sizeof(char));
- sprintf(filename,"%s.paired_mult",sevenway_root);
- if ((outputs[OUTPUT_PM] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- filename = (char *) CALLOC(strlen(sevenway_root)+strlen(".paired_mult_xs")+1,sizeof(char));
- sprintf(filename,"%s.paired_mult_xs",sevenway_root);
- if ((outputs[OUTPUT_PX] = fopen(filename,write_mode)) == NULL) {
- fprintf(stderr,"Cannot open file %s for writing\n",filename);
- exit(9);
- }
- FREE(filename);
-
- return;
-}
-
-static void
-split_output_close ( ) {
- int i;
-
- for (i = 1; i <= N_SPLIT_OUTPUTS; i++) {
- fclose(outputs[i]);
- }
- return;
-}
-
-
#define BUFFERLEN 1024
int
main (int argc, char *argv[]) {
- FILE *fp_sam;
+ FILE **sam_inputs, *fp_sam;
+ int ninputs, filei;
int nchromosomes, i;
Univcoord_T *chroffsets;
Chrpos_T *chrlengths;
@@ -1431,7 +1292,8 @@ main (int argc, char *argv[]) {
char buffer[BUFFERLEN], *lastp, *p;
Intlist_T linelengths;
- int headerlen, linelen;
+ int *headerlengths, linelen;
+ Filestring_T headers = NULL;
#ifdef DEBUG14
Intlist_T linelengths_goldstd;
int linelen_goldstd;
@@ -1439,7 +1301,7 @@ main (int argc, char *argv[]) {
char *fileroot = NULL, *iitfile;
Univ_IIT_T chromosome_iit;
- int ncells, nmarked;
+ int *ncells, ncells_total, nmarked;
int opt;
extern int optind;
@@ -1460,7 +1322,7 @@ main (int argc, char *argv[]) {
exit(0);
} else if (!strcmp(long_name,"split-output")) {
- sevenway_root = optarg;
+ split_output_root = optarg;
} else if (!strcmp(long_name,"append-output")) {
appendp = true;
@@ -1554,128 +1416,116 @@ main (int argc, char *argv[]) {
FREE(chrlengths);
}
- if (sevenway_root != NULL) {
- split_output_open(sevenway_root,appendp);
+ /* Open all outputs, even if --split-output is not used */
+ outputs = (FILE **) CALLOC((1+N_SPLIT_OUTPUTS),sizeof(FILE *));
+
+
+ /* Inputs */
+ ninputs = argc;
+ sam_inputs = (FILE **) CALLOC(ninputs,sizeof(FILE *));
+ headerlengths = (int *) CALLOC(ninputs,sizeof(int));
+ ncells = (int *) CALLOC(ninputs,sizeof(int));
+ for (filei = 0; filei < ninputs; filei++) {
+ if ((sam_inputs[filei] = fopen(argv[filei],"r")) == NULL) {
+ fprintf(stderr,"Cannot open SAM file %s\n",argv[i]);
+ exit(9);
+ }
}
-
- /* SAM file */
stopwatch = Stopwatch_new();
- if ((fp_sam = fopen(argv[0],"r")) == NULL) {
- fprintf(stderr,"Cannot open SAM file %s\n",argv[0]);
- exit(9);
- } else {
- Stopwatch_start(stopwatch);
- fprintf(stderr,"Analyzing SAM file...");
- headerlen = SAM_header_length(&lastchar,fp_sam);
- }
-
- /* Compute number of mappers and non-mappers */
-#ifdef DEBUG14
- if (!feof(fp_sam)) {
- if ((linelen = Samread_parse_linelen_fromfile(fp_sam)) > 0) {
- linelen += 1; /* Add 1 for char read by SAM_header_length */
- linelengths_goldstd = Intlist_push(NULL,linelen);
- }
- }
+ Stopwatch_start(stopwatch);
+ fprintf(stderr,"Analyzing %d SAM files...\n",ninputs);
- while (!feof(fp_sam)) {
- if ((linelen = Samread_parse_linelen_fromfile(fp_sam)) > 0) {
- linelengths_goldstd = Intlist_push(linelengths_goldstd,linelen);
- }
- }
- linelengths_goldstd = Intlist_reverse(linelengths_goldstd);
+ linelengths = (Intlist_T) NULL;
+ ncells_total = 0;
+ for (filei = 0; filei < ninputs; filei++) {
+ fp_sam = sam_inputs[filei];
+ fileposition = headerlengths[filei] = SAM_header_length(&lastchar,fp_sam); /* Ignore lastchar */
- moveto(fp_sam,headerlen+1); /* Simulate SAM_header_length */
-#endif
-
- /* Take care of char read by SAM_header_length */
+ /* Take care of char read by SAM_header_length */
#ifdef HAVE_FSEEKO
- fseeko(fp_sam,-1,SEEK_CUR);
+ fseeko(fp_sam,-1,SEEK_CUR);
#else
- fseek(fp_sam,-1,SEEK_CUR);
+ fseek(fp_sam,-1,SEEK_CUR);
#endif
- linelengths = (Intlist_T) NULL;
- ncells = 0;
- linelen = 0;
- fileposition = headerlen;
- while (fgets(buffer,BUFFERLEN,fp_sam) != NULL) {
- /* printf("Read %s\n",buffer); */
- lastp = buffer;
- while ((p = index(lastp,'\n')) != NULL) {
- linelen += (p - lastp)/sizeof(char) + 1;
-#ifdef DEBUG14
- linelengths_goldstd = Intlist_pop(linelengths_goldstd,&linelen_goldstd);
- if (linelen == linelen_goldstd) {
- /* fprintf(stderr,"Correct and observed linelen are %d\n",linelen); */
- } else {
- fprintf(stderr,"Correct linelen is %d. Observed is %d\n",linelen_goldstd,linelen);
- fprintf(stderr,"%s\n",buffer);
- exit(9);
- }
-#endif
- linelengths = Intlist_push(linelengths,linelen);
- fileposition += linelen;
- ncells++;
+ linelen = 0;
+ ncells[filei] = 0;
+ while (fgets(buffer,BUFFERLEN,fp_sam) != NULL) {
+ /* printf("Read %s\n",buffer); */
+ lastp = buffer;
+ while ((p = index(lastp,'\n')) != NULL) {
+ linelen += (p - lastp)/sizeof(char) + 1;
- linelen = 0;
- lastp = p + 1;
+ linelengths = Intlist_push(linelengths,linelen);
+ fileposition += linelen;
+ ncells[filei] += 1;
+
+ linelen = 0;
+ lastp = p + 1;
+ }
+ linelen += strlen(lastp);
+ /* printf("Adding %d to get linelen %d\n",strlen(buffer),linelen); */
}
- linelen += strlen(lastp);
- /* printf("Adding %d to get linelen %d\n",strlen(buffer),linelen); */
- }
-#ifdef DEBUG14
- if (linelengths_goldstd != NULL) {
- while (linelengths_goldstd != NULL) {
- linelengths_goldstd = Intlist_pop(linelengths_goldstd,&linelen_goldstd);
- fprintf(stderr,"Correct linelength %d is missing\n",linelen_goldstd);
+ ncells_total += ncells[filei];
+
+ if (fileposition != Access_filesize(argv[filei])) {
+ fprintf(stderr,"Something is wrong with parsing of SAM file %s\n",argv[filei]);
+ fprintf(stderr,"Final file position using sortinfo: %llu\n",(unsigned long long) fileposition);
+ fprintf(stderr,"File size of SAM output file: %llu\n",(unsigned long long) Access_filesize(argv[0]));
+ exit(9);
+ } else {
+ fprintf(stderr," File %d has %d SAM lines.\n",filei+1,ncells[filei]);
}
- exit(9);
- }
-#endif
- fprintf(stderr,"done (%.1f seconds). Found %d SAM lines.\n",Stopwatch_stop(stopwatch),ncells);
- if (fileposition != Access_filesize(argv[0])) {
- fprintf(stderr,"Something is wrong with parsing of SAM file\n");
- fprintf(stderr,"Final file position using sortinfo: %llu\n",(unsigned long long) fileposition);
- fprintf(stderr,"File size of SAM output file: %llu\n",(unsigned long long) Access_filesize(argv[0]));
- exit(9);
}
- if (ncells == 0) {
+ fprintf(stderr,"Done with analysis (%.1f seconds). Found %d SAM lines total.\n",
+ Stopwatch_stop(stopwatch),ncells_total);
+
+ if (ncells_total == 0) {
/* Exit without printing header */
} else if (sam_headers_p == false) {
/* Don't print SAM headers */
- } else if (sevenway_root == NULL) {
- /* Print SAM headers to stdout */
- moveto(fp_sam,0);
- SAM_header_change_HD_tosorted_stdout(fp_sam,headerlen);
-
} else {
- /* Print SAM headers to each output */
- moveto(fp_sam,0);
- SAM_header_change_HD_tosorted_split(fp_sam,headerlen,outputs,N_SPLIT_OUTPUTS);
+ moveto(sam_inputs[0],0);
+ headers = SAM_header_change_HD_tosorted(sam_inputs[0],headerlengths[0]);
}
linelengths = Intlist_reverse(linelengths);
if (mark_duplicates_p == false) {
- process_without_dups(fp_sam,headerlen,linelengths,ncells,chromosome_iit,chroffsets);
+ process_without_dups(sam_inputs,headerlengths,ncells,ninputs,linelengths,ncells_total,
+ chromosome_iit,chroffsets,headers);
} else {
- nmarked = process_with_dups(fp_sam,headerlen,linelengths,ncells,chromosome_iit,chroffsets);
+ nmarked = process_with_dups(sam_inputs,headerlengths,ncells,ninputs,linelengths,ncells_total,
+ chromosome_iit,chroffsets,headers);
fprintf(stderr,"Marked %d out of %d SAM lines as duplicates (%.1f%%)\n",
- nmarked,ncells,100.0*(double) nmarked/(double) (ncells));
+ nmarked,ncells_total,100.0*(double) nmarked/(double) (ncells_total));
+ }
+
+ for (filei = 0; filei < ninputs; filei++) {
+ fclose(sam_inputs[filei]);
}
+ FREE(sam_inputs);
+ FREE(headerlengths);
+ FREE(ncells);
- fclose(fp_sam);
+ if (headers != NULL) {
+ Filestring_free(&headers);
+ }
- if (sevenway_root != NULL) {
- split_output_close();
+ /* SAM_header_touch(outputs,split_output_root,appendp); -- Don't want to destroy other SAM files */
+ for (i = 1; i <= N_SPLIT_OUTPUTS; i++) {
+ if (outputs[i] != NULL) {
+ fclose(outputs[i]);
+ }
}
+ FREE(outputs);
+
Intlist_free(&linelengths);
diff --git a/src/samflags.h b/src/samflags.h
index 101cae0..d7d626f 100644
--- a/src/samflags.h
+++ b/src/samflags.h
@@ -1,4 +1,4 @@
-/* $Id: samflags.h 154089 2014-11-25 21:03:16Z twu $ */
+/* $Id: samflags.h 155282 2014-12-12 19:42:54Z twu $ */
#ifndef SAMFLAGS_INCLUDED
#define SAMFLAGS_INCLUDED
@@ -27,53 +27,131 @@
/* XO tag for output type */
#define ABBREV_NOMAPPING_1 "NM"
#define ABBREV_NOMAPPING_2 "NM"
-#define ABBREV_HALFMAPPING_UNIQ "HU"
-#define ABBREV_HALFMAPPING_CIRCULAR "HC"
-#define ABBREV_HALFMAPPING_TRANSLOC "HT"
-#define ABBREV_HALFMAPPING_MULT "HM"
-#define ABBREV_HALFMAPPING_MULT_XS "HX"
+
#define ABBREV_UNPAIRED_UNIQ "UU"
-#define ABBREV_UNPAIRED_CIRCULAR "UC"
#define ABBREV_UNPAIRED_TRANSLOC "UT"
#define ABBREV_UNPAIRED_MULT "UM"
+
+#define ABBREV_UNPAIRED_CIRCULAR "UC"
#define ABBREV_UNPAIRED_MULT_XS "UX"
-#define ABBREV_PAIRED_UNIQ_CIRCULAR "PC"
+
+
+#define ABBREV_HALFMAPPING_UNIQ "HU"
+#define ABBREV_HALFMAPPING_TRANSLOC "HT"
+#define ABBREV_HALFMAPPING_MULT "HM"
+
#define ABBREV_PAIRED_UNIQ_INV "PI"
#define ABBREV_PAIRED_UNIQ_SCR "PS"
#define ABBREV_PAIRED_UNIQ_LONG "PL"
#define ABBREV_PAIRED_MULT "PM"
-#define ABBREV_PAIRED_MULT_XS "PX"
+
#define ABBREV_CONCORDANT_UNIQ "CU"
-#define ABBREV_CONCORDANT_CIRCULAR "CC"
#define ABBREV_CONCORDANT_TRANSLOC "CT"
#define ABBREV_CONCORDANT_MULT "CM"
+
+#define ABBREV_HALFMAPPING_CIRCULAR "HC"
+#define ABBREV_PAIRED_UNIQ_CIRCULAR "PC"
+#define ABBREV_CONCORDANT_CIRCULAR "CC"
+
+#define ABBREV_HALFMAPPING_MULT_XS "HX"
+#define ABBREV_PAIRED_MULT_XS "PX"
#define ABBREV_CONCORDANT_MULT_XS "CX"
+
typedef enum {OUTPUT_NONE,
+
OUTPUT_NM, /* nomapping */
- OUTPUT_HU, /* halfmapping_uniq */
- OUTPUT_HC, /* halfmapping_circular */
- OUTPUT_HT, /* halfmapping_transloc */
- OUTPUT_HM, /* halfmapping_mult */
- OUTPUT_HX, /* halfmapping_mult_xs */
+
OUTPUT_UU, /* unpaired_uniq */
- OUTPUT_UC, /* unpaired_circular */
OUTPUT_UT, /* unpaired_transloc */
OUTPUT_UM, /* unpaired_mult */
+
+ OUTPUT_UC, /* unpaired_circular */
OUTPUT_UX, /* unpaired_mult_xs */
- OUTPUT_PC, /* paired_uniq_circular */
+
+
+ OUTPUT_HU, /* halfmapping_uniq */
+ OUTPUT_HT, /* halfmapping_transloc */
+ OUTPUT_HM, /* halfmapping_mult */
+
OUTPUT_PI, /* paired_uniq_inv */
OUTPUT_PS, /* paired_uniq_scr */
OUTPUT_PL, /* paired_uniq_long */
OUTPUT_PM, /* paired_mult */
- OUTPUT_PX, /* paired_mult_xs */
OUTPUT_CU, /* concordant_uniq */
- OUTPUT_CC, /* concordant_circular */
OUTPUT_CT, /* concordant_transloc */
OUTPUT_CM, /* concordant_mult */
+
+ OUTPUT_HC, /* halfmapping_circular */
+ OUTPUT_PC, /* paired_uniq_circular */
+ OUTPUT_CC, /* concordant_circular */
+
+ OUTPUT_HX, /* halfmapping_mult_xs */
+ OUTPUT_PX, /* paired_mult_xs */
OUTPUT_CX} /* concordant_mult_xs */
SAM_split_output_type;
+/* output 0 is stdout */
+#define N_SPLIT_OUTPUTS_SINGLE_STD 4
+#define N_SPLIT_OUTPUTS_SINGLE_TOCIRC 5
+#define N_SPLIT_OUTPUTS_SINGLE 6
+
+#define N_SPLIT_OUTPUTS_STD 16
+#define N_SPLIT_OUTPUTS_TOCIRC 19
+#define N_SPLIT_OUTPUTS 22
+
+
+
+
+/* GSNAP outputs */
+#if 0
+ FILE *fp_nomapping; /* NM */
+
+ FILE *fp_unpaired_uniq; /* UU */
+ FILE *fp_unpaired_transloc; /* UT */
+ FILE *fp_unpaired_mult; /* UM */
+
+ FILE *fp_unpaired_circular; /* UC */
+ FILE *fp_unpaired_mult_xs_1; /* UX */
+ FILE *fp_unpaired_mult_xs_2; /* UX */
+
+ FILE *fp_halfmapping_uniq; /* HU */
+ FILE *fp_halfmapping_transloc; /* HT */
+ FILE *fp_halfmapping_mult; /* HM */
+
+ FILE *fp_paired_uniq_inv; /* PI */
+ FILE *fp_paired_uniq_scr; /* PS */
+ FILE *fp_paired_uniq_long; /* PL */
+ FILE *fp_paired_mult; /* PM */
+
+ FILE *fp_concordant_uniq; /* CU */
+ FILE *fp_concordant_transloc; /* CT */
+ FILE *fp_concordant_mult; /* CM */
+
+ FILE *fp_halfmapping_circular; /* HC */
+ FILE *fp_paired_uniq_circular; /* PC */
+ FILE *fp_concordant_circular; /* CC */
+
+ FILE *fp_halfmapping_mult_xs_1; /* HX */
+ FILE *fp_halfmapping_mult_xs_2; /* HX */
+ FILE *fp_paired_mult_xs_1; /* PX */
+ FILE *fp_paired_mult_xs_2; /* PX */
+ FILE *fp_concordant_mult_xs_1; /* CX */
+ FILE *fp_concordant_mult_xs_2; /* CX */
+#endif
+
+/* GMAP outputs */
+#if 0
+ FILE *fp_nomapping; /* NM */
+
+ FILE *fp_uniq; /* UU */
+ FILE *fp_transloc; /* UT */
+ FILE *fp_mult; /* UM */
+
+ FILE *fp_circular; /* UC */
+ FILE *fp_mult_xs; /* UX */
+#endif
+
#endif
diff --git a/src/samheader.c b/src/samheader.c
index 3e279ae..6575386 100644
--- a/src/samheader.c
+++ b/src/samheader.c
@@ -1,117 +1,195 @@
-static char rcsid[] = "$Id: samheader.c 155503 2014-12-16 22:22:55Z twu $";
+static char rcsid[] = "$Id: samheader.c 157094 2015-01-21 00:33:35Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "samheader.h"
+#include <stdlib.h>
+#include <string.h>
+#include "mem.h"
+
#define CHUNK 1024
-void
-SAM_header_change_HD_tosorted_stdout (FILE *fp, int headerlen) {
- char buffer[CHUNK], c, c0, c1, c2;
+#ifdef USE_MPI
+MPI_File
+#else
+FILE *
+#endif
+SAM_header_open_file (SAM_split_output_type split_output, char *split_output_root, bool appendp) {
+#ifdef USE_MPI
+ MPI_File output;
+#else
+ FILE *output;
+ char *write_mode;
+#endif
+ char *filename, *suffix;
+
+ if (split_output == OUTPUT_NONE) {
+
+#ifdef USE_MPI
+ /* output file name is passed in through split_output_root */
+ if (appendp == true) {
+ MPI_File_open(MPI_COMM_WORLD,split_output_root,MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_APPEND,
+ MPI_INFO_NULL,&output);
+ } else {
+ /* Need to remove existing file, if any */
+ MPI_File_open(MPI_COMM_WORLD,split_output_root,MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_DELETE_ON_CLOSE,
+ MPI_INFO_NULL,&output);
+ MPI_File_close(&output);
+ MPI_File_open(MPI_COMM_WORLD,split_output_root,MPI_MODE_CREATE | MPI_MODE_WRONLY,
+ MPI_INFO_NULL,&output);
+ }
+ return output;
- /* @HD */
- while (headerlen > 0 && (c = fgetc(fp)) != '\t') {
- putchar(c);
- headerlen--;
- }
- if (headerlen > 0) {
- putchar('\t');
- headerlen--;
- }
+#else
+ if (appendp == true) {
+ write_mode = "a";
+ } else {
+ write_mode = "w";
+ }
- /* VN */
- while (headerlen > 0 && (c = fgetc(fp)) != '\t') {
- putchar(c);
- headerlen--;
- }
- if (headerlen > 0) {
- putchar('\t');
- headerlen--;
- }
+ filename = (char *) CALLOC(strlen(split_output_root)+1,sizeof(char));
+ sprintf(filename,"%s",split_output_root);
- if (headerlen > 3) {
- /* SO: */
- c0 = fgetc(fp);
- c1 = fgetc(fp);
- c2 = fgetc(fp);
- printf("%c%c%c",c0,c1,c2);
- headerlen -= 3;
+ if ((output = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ return (FILE *) NULL;
+ } else {
+ FREE(filename);
+ return output;
+ }
+#endif
- if (c0 == 'S' && c1 == 'O' && c2 == ':') {
- printf("coordinate\n");
- while (headerlen > 0 && fgetc(fp) != '\n') {
- /* Skip given SO value */
- headerlen--;
- }
- headerlen--;
+ } else {
+ switch (split_output) {
+ case OUTPUT_NONE: /* Handled above */ abort();
+
+ case OUTPUT_NM: suffix = "nomapping"; break;
+
+#ifdef GSNAP
+ case OUTPUT_UU: suffix = "unpaired_uniq"; break;
+ case OUTPUT_UT: suffix = "unpaired_transloc"; break;
+ case OUTPUT_UM: suffix = "unpaired_mult"; break;
+#else
+ case OUTPUT_UU: suffix = "uniq"; break;
+ case OUTPUT_UT: suffix = "transloc"; break;
+ case OUTPUT_UM: suffix = "mult"; break;
+#endif
+
+#ifdef GSNAP
+ case OUTPUT_UC: suffix = "unpaired_circular"; break;
+ case OUTPUT_UX: suffix = "unpaired_mult_xs"; break;
+#else
+ case OUTPUT_UC: suffix = "circular"; break;
+ case OUTPUT_UX: suffix = "mult_xs"; break;
+#endif
+
+ case OUTPUT_HU: suffix = "halfmapping_uniq"; break;
+ case OUTPUT_HT: suffix = "halfmapping_transloc"; break;
+ case OUTPUT_HM: suffix = "halfmapping_mult"; break;
+
+ case OUTPUT_PI: suffix = "paired_uniq_inv"; break;
+ case OUTPUT_PS: suffix = "paired_uniq_scr"; break;
+ case OUTPUT_PL: suffix = "paired_uniq_long"; break;
+ case OUTPUT_PM: suffix = "paired_mult"; break;
+
+ case OUTPUT_CU: suffix = "concordant_uniq"; break;
+ case OUTPUT_CT: suffix = "concordant_transloc"; break;
+ case OUTPUT_CM: suffix = "concordant_mult"; break;
+
+ case OUTPUT_HC: suffix = "halfmapping_circular"; break;
+ case OUTPUT_PC: suffix = "paired_uniq_circular"; break;
+ case OUTPUT_CC: suffix = "concordant_circular"; break;
+
+ case OUTPUT_HX: suffix = "halfmapping_mult_xs"; break;
+ case OUTPUT_PX: suffix = "paired_mult_xs"; break;
+ case OUTPUT_CX: suffix = "concordant_mult_xs"; break;
+
+ default:
+ fprintf(stderr,"Cannot handle split output type %d\n",split_output);
+ abort();
}
- }
- while (headerlen > CHUNK) {
- fread(buffer,sizeof(char),CHUNK,fp);
- fwrite(buffer,sizeof(char),CHUNK,stdout);
- headerlen -= CHUNK;
- }
- if (headerlen > 0) {
- fread(buffer,sizeof(char),headerlen,fp);
- fwrite(buffer,sizeof(char),headerlen,stdout);
- }
+ filename = (char *) CALLOC(strlen(split_output_root)+strlen(".")+strlen(suffix)+1,sizeof(char));
+ sprintf(filename,"%s.%s",split_output_root,suffix);
+
+#ifdef USE_MPI
+ if (appendp == true) {
+ MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_APPEND,
+ MPI_INFO_NULL,&output);
+ } else {
+ /* Need to remove existing file, if any */
+ MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_DELETE_ON_CLOSE,
+ MPI_INFO_NULL,&output);
+ MPI_File_close(&output);
+ MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_CREATE | MPI_MODE_WRONLY,
+ MPI_INFO_NULL,&output);
+ }
+ FREE(filename);
+ return output;
- return;
+#else
+ if (appendp == true) {
+ write_mode = "a";
+ } else {
+ write_mode = "w";
+ }
+
+ if ((output = fopen(filename,write_mode)) == NULL) {
+ fprintf(stderr,"Cannot open file %s for writing\n",filename);
+ exit(9);
+ return (FILE *) NULL;
+ } else {
+ FREE(filename);
+ return output;
+ }
+#endif
+ }
}
-void
-SAM_header_change_HD_tosorted_split (FILE *fp, int headerlen, FILE **outputs, int noutputs) {
+/* Called only by sam_sort */
+Filestring_T
+SAM_header_change_HD_tosorted (FILE *input, int headerlen) {
+ Filestring_T fp;
char buffer[CHUNK], c, c0, c1, c2;
- int i;
+
+ fp = Filestring_new(/*id*/0);
/* @HD */
- while (headerlen > 0 && (c = fgetc(fp)) != '\t') {
- for (i = 1; i <= noutputs; i++) {
- putc(c,outputs[i]);
- }
+ while (headerlen > 0 && (c = fgetc(input)) != '\t') {
+ PUTC(c,fp);
headerlen--;
}
if (headerlen > 0) {
- for (i = 1; i <= noutputs; i++) {
- putc('\t',outputs[i]);
- }
+ PUTC('\t',fp);
headerlen--;
}
/* VN */
- while (headerlen > 0 && (c = fgetc(fp)) != '\t') {
- for (i = 1; i <= noutputs; i++) {
- putc(c,outputs[i]);
- }
+ while (headerlen > 0 && (c = fgetc(input)) != '\t') {
+ PUTC(c,fp);
headerlen--;
}
if (headerlen > 0) {
- for (i = 1; i <= noutputs; i++) {
- putc('\t',outputs[i]);
- }
+ PUTC('\t',fp);
headerlen--;
}
if (headerlen > 3) {
/* SO: */
- c0 = fgetc(fp);
- c1 = fgetc(fp);
- c2 = fgetc(fp);
- for (i = 1; i <= noutputs; i++) {
- fprintf(outputs[i],"%c%c%c",c0,c1,c2);
- }
+ c0 = fgetc(input);
+ c1 = fgetc(input);
+ c2 = fgetc(input);
+ FPRINTF(fp,"%c%c%c",c0,c1,c2);
headerlen -= 3;
if (c0 == 'S' && c1 == 'O' && c2 == ':') {
- for (i = 1; i <= noutputs; i++) {
- fprintf(outputs[i],"coordinate\n");
- }
- while (headerlen > 0 && fgetc(fp) != '\n') {
+ FPRINTF(fp,"coordinate\n");
+ while (headerlen > 0 && fgetc(input) != '\n') {
/* Skip given SO value */
headerlen--;
}
@@ -120,23 +198,38 @@ SAM_header_change_HD_tosorted_split (FILE *fp, int headerlen, FILE **outputs, in
}
while (headerlen > CHUNK) {
- fread(buffer,sizeof(char),CHUNK,fp);
- for (i = 1; i <= noutputs; i++) {
- fwrite(buffer,sizeof(char),CHUNK,outputs[i]);
- }
+ fread(buffer,sizeof(char),CHUNK,input);
+ Filestring_puts(fp,buffer,/*strlength*/CHUNK);
headerlen -= CHUNK;
}
if (headerlen > 0) {
- fread(buffer,sizeof(char),headerlen,fp);
- for (i = 1; i <= noutputs; i++) {
- fwrite(buffer,sizeof(char),headerlen,outputs[i]);
- }
+ fread(buffer,sizeof(char),headerlen,input);
+ Filestring_puts(fp,buffer,/*strlength*/headerlen);
}
- return;
+ return fp;
}
+#ifdef USE_MPI
+void
+SAM_header_print_HD (MPI_File fp, int nworkers, bool orderedp) {
+
+ MPI_File_write_shared(fp,"@HD",strlen("@HD"),MPI_CHAR,MPI_STATUS_IGNORE);
+ MPI_File_write_shared(fp,"\tVN:1.0",strlen("\tVN:1.0"),MPI_CHAR,MPI_STATUS_IGNORE);
+ if (nworkers > 1 && orderedp == false) {
+ MPI_File_write_shared(fp,"\tSO:unsorted",strlen("\tSO:unsorted"),MPI_CHAR,MPI_STATUS_IGNORE);
+ } else {
+ /* Picard does not recognize type unknown */
+ /* fprintf(fp,"\tSO:unknown"); */
+ MPI_File_write_shared(fp,"\tSO:unsorted",strlen("\tSO:unsorted"),MPI_CHAR,MPI_STATUS_IGNORE);
+ }
+ MPI_File_write_shared(fp,"\n",1,MPI_CHAR,MPI_STATUS_IGNORE);
+
+ return;
+}
+
+#else
void
SAM_header_print_HD (FILE *fp, int nworkers, bool orderedp) {
@@ -153,8 +246,41 @@ SAM_header_print_HD (FILE *fp, int nworkers, bool orderedp) {
return;
}
+#endif
+
+#ifdef USE_MPI
+void
+SAM_header_print_PG (MPI_File fp, int argc, char **argv, int optind) {
+ char **argstart;
+ int c;
+
+ MPI_File_write_shared(fp,"@PG",strlen("@PG"),MPI_CHAR,MPI_STATUS_IGNORE);
+#ifdef GSNAP
+ MPI_File_write_shared(fp,"\tID:GSNAP",strlen("\tID:GSNAP"),MPI_CHAR,MPI_STATUS_IGNORE);
+ MPI_File_write_shared(fp,"\tPN:gsnap",strlen("\tPN:gsnap"),MPI_CHAR,MPI_STATUS_IGNORE);
+#elif defined(PMAP)
+ MPI_File_write_shared(fp,"\tID:PMAP",strlen("\tID:PMAP"),MPI_CHAR,MPI_STATUS_IGNORE);
+ MPI_File_write_shared(fp,"\tPN:pmap",strlen("\tPN:pmap"),MPI_CHAR,MPI_STATUS_IGNORE);
+#else
+ MPI_File_write_shared(fp,"\tID:GMAP",strlen("\tID:GMAP"),MPI_CHAR,MPI_STATUS_IGNORE);
+ MPI_File_write_shared(fp,"\tPN:gmap",strlen("\tPN:gmap"),MPI_CHAR,MPI_STATUS_IGNORE);
+#endif
+ MPI_File_write_shared(fp,PACKAGE_VERSION,strlen(PACKAGE_VERSION),MPI_CHAR,MPI_STATUS_IGNORE);
+
+ MPI_File_write_shared(fp,"\tCL:",strlen("\tCL:"),MPI_CHAR,MPI_STATUS_IGNORE);
+ argstart = &(argv[-optind]);
+ MPI_File_write_shared(fp,argstart[0],strlen(argstart[0]),MPI_CHAR,MPI_STATUS_IGNORE);
+ for (c = 1; c < argc + optind; c++) {
+ MPI_File_write_shared(fp," ",1,MPI_CHAR,MPI_STATUS_IGNORE);
+ MPI_File_write_shared(fp,argstart[c],strlen(argstart[c]),MPI_CHAR,MPI_STATUS_IGNORE);
+ }
+ MPI_File_write_shared(fp,"\n",1,MPI_CHAR,MPI_STATUS_IGNORE);
+
+ return;
+}
+#else
void
SAM_header_print_PG (FILE *fp, int argc, char **argv, int optind) {
char **argstart;
@@ -181,28 +307,12 @@ SAM_header_print_PG (FILE *fp, int argc, char **argv, int optind) {
}
fprintf(fp,"\n");
-#if 0
- /* Algorithm types. Now using XG instead. */
- fprintf(fp,"@PG");
-#ifdef GSNAP
- fprintf(fp,"\tID:A");
- fprintf(fp,"\tPN:gsnap-suffix-array");
- fprintf(fp,"\n");
-
- fprintf(fp,"\tID:M");
- fprintf(fp,"\tPN:gsnap-gmap-method");
- fprintf(fp,"\n");
-
- fprintf(fp,"\tID:O");
- fprintf(fp,"\tPN:gsnap-overlap-merge");
- fprintf(fp,"\n");
-#endif
-
-#endif
-
return;
}
+#endif
+
+/* Called only by sam_sort */
int
SAM_header_length (int *lastchar, FILE *fp) {
int headerlen = 0;
diff --git a/src/samheader.h b/src/samheader.h
index b7cbd50..6a6c2cb 100644
--- a/src/samheader.h
+++ b/src/samheader.h
@@ -1,18 +1,42 @@
-/* $Id: samheader.h 154452 2014-12-02 19:28:04Z twu $ */
+/* $Id: samheader.h 157230 2015-01-22 18:49:34Z twu $ */
#ifndef SAMHEADER_INCLUDED
#define SAMHEADER_INCLUDED
+#ifdef USE_MPI
+#include <mpi.h>
+#endif
+
#include <stdio.h>
#include "bool.h"
+#include "samflags.h"
+#include "filestring.h"
+#ifdef USE_MPI
+extern MPI_File
+#else
+extern FILE *
+#endif
+SAM_header_open_file (SAM_split_output_type split_output, char *split_output_root, bool appendp);
+
+extern Filestring_T
+SAM_header_change_HD_tosorted (FILE *input, int headerlen);
+
+#ifdef USE_MPI
extern void
-SAM_header_change_HD_tosorted_stdout (FILE *fp, int headerlen);
-extern void
-SAM_header_change_HD_tosorted_split (FILE *fp, int headerlen, FILE **outputs, int noutputs);
+SAM_header_print_HD (MPI_File fp, int nworkers, bool orderedp);
+#else
extern void
SAM_header_print_HD (FILE *fp, int nworkers, bool orderedp);
+#endif
+
+#ifdef USE_MPI
+extern void
+SAM_header_print_PG (MPI_File fp, int argc, char **argv, int optind);
+#else
extern void
SAM_header_print_PG (FILE *fp, int argc, char **argv, int optind);
+#endif
+
extern int
SAM_header_length (int *lastchar, FILE *fp);
diff --git a/src/samprint.c b/src/samprint.c
index eda8d82..c78372e 100644
--- a/src/samprint.c
+++ b/src/samprint.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: samprint.c 161183 2015-03-18 17:04:33Z twu $";
+static char rcsid[] = "$Id: samprint.c 166973 2015-06-05 20:27:15Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -11,7 +11,6 @@ static char rcsid[] = "$Id: samprint.c 161183 2015-03-18 17:04:33Z twu $";
#include "mem.h"
#include "complement.h"
-#include "stage3hr.h"
#include "mapq.h"
#include "assert.h"
@@ -67,153 +66,37 @@ static char *failedinput_root;
static bool fastq_format_p;
static bool hide_soft_clips_p;
+static bool clip_overlap_p;
+static bool merge_overlap_p;
+
static bool sam_multiple_primaries_p;
static bool force_xs_direction_p;
static bool md_lowercase_variant_p;
static IIT_T snps_iit;
+static Univ_IIT_T chromosome_iit;
+static Genome_T genome;
+
void
SAM_setup (bool quiet_if_excessive_p_in, int maxpaths_report_in,
char *failedinput_root_in, bool fastq_format_p_in, bool hide_soft_clips_p_in,
- bool sam_multiple_primaries_p_in,
- bool force_xs_direction_p_in, bool md_lowercase_variant_p_in, IIT_T snps_iit_in) {
+ bool clip_overlap_p_in, bool merge_overlap_p_in, bool sam_multiple_primaries_p_in,
+ bool force_xs_direction_p_in, bool md_lowercase_variant_p_in, IIT_T snps_iit_in,
+ Univ_IIT_T chromosome_iit_in, Genome_T genome_in) {
quiet_if_excessive_p = quiet_if_excessive_p_in;
failedinput_root = failedinput_root_in;
fastq_format_p = fastq_format_p_in;
hide_soft_clips_p = hide_soft_clips_p_in;
+ clip_overlap_p = clip_overlap_p_in;
+ merge_overlap_p = merge_overlap_p_in;
maxpaths_report = maxpaths_report_in;
sam_multiple_primaries_p = sam_multiple_primaries_p_in;
force_xs_direction_p = force_xs_direction_p_in;
md_lowercase_variant_p = md_lowercase_variant_p_in;
snps_iit = snps_iit_in;
- return;
-}
-
-
-static FILE *fp_failedinput_1;
-static FILE *fp_failedinput_2;
-
-static FILE *fp_nomapping;
-static FILE *fp_unpaired_uniq;
-static FILE *fp_unpaired_circular;
-static FILE *fp_unpaired_transloc;
-static FILE *fp_unpaired_mult;
-static FILE *fp_unpaired_mult_xs_1;
-static FILE *fp_unpaired_mult_xs_2;
-static FILE *fp_halfmapping_uniq;
-static FILE *fp_halfmapping_circular;
-static FILE *fp_halfmapping_transloc;
-static FILE *fp_halfmapping_mult;
-static FILE *fp_halfmapping_mult_xs_1;
-static FILE *fp_halfmapping_mult_xs_2;
-static FILE *fp_paired_uniq_circular;
-static FILE *fp_paired_uniq_inv;
-static FILE *fp_paired_uniq_scr;
-static FILE *fp_paired_uniq_long;
-static FILE *fp_paired_mult;
-static FILE *fp_paired_mult_xs_1;
-static FILE *fp_paired_mult_xs_2;
-static FILE *fp_concordant_uniq;
-static FILE *fp_concordant_circular;
-static FILE *fp_concordant_transloc;
-static FILE *fp_concordant_mult;
-static FILE *fp_concordant_mult_xs_1;
-static FILE *fp_concordant_mult_xs_2;
-
-
-void
-SAM_file_setup_single (FILE *fp_failedinput_in, FILE *fp_nomapping_in,
- FILE *fp_unpaired_uniq_in, FILE *fp_unpaired_circular_in, FILE *fp_unpaired_transloc_in,
- FILE *fp_unpaired_mult_in, FILE *fp_unpaired_mult_xs_1_in) {
-
- fp_failedinput_1 = fp_failedinput_in;
-
- fp_nomapping = fp_nomapping_in;
- fp_unpaired_uniq = fp_unpaired_uniq_in;
- fp_unpaired_circular = fp_unpaired_circular_in;
- fp_unpaired_transloc = fp_unpaired_transloc_in;
- fp_unpaired_mult = fp_unpaired_mult_in;
- fp_unpaired_mult_xs_1 = fp_unpaired_mult_xs_1_in;
-
- return;
-}
-
-void
-SAM_file_setup_paired (FILE *fp_failedinput_1_in, FILE *fp_failedinput_2_in, FILE *fp_nomapping_in,
- FILE *fp_halfmapping_uniq_in, FILE *fp_halfmapping_circular_in, FILE *fp_halfmapping_transloc_in,
- FILE *fp_halfmapping_mult_in, FILE *fp_halfmapping_mult_xs_1_in, FILE *fp_halfmapping_mult_xs_2_in,
- FILE *fp_paired_uniq_circular_in, FILE *fp_paired_uniq_inv_in, FILE *fp_paired_uniq_scr_in,
- FILE *fp_paired_uniq_long_in, FILE *fp_paired_mult_in, FILE *fp_paired_mult_xs_1_in, FILE *fp_paired_mult_xs_2_in,
- FILE *fp_concordant_uniq_in, FILE *fp_concordant_circular_in, FILE *fp_concordant_transloc_in,
- FILE *fp_concordant_mult_in, FILE *fp_concordant_mult_xs_1_in, FILE *fp_concordant_mult_xs_2_in) {
-
- fp_failedinput_1 = fp_failedinput_1_in;
- fp_failedinput_2 = fp_failedinput_2_in;
-
- fp_nomapping = fp_nomapping_in;
- fp_halfmapping_uniq = fp_halfmapping_uniq_in;
- fp_halfmapping_circular = fp_halfmapping_circular_in;
- fp_halfmapping_transloc = fp_halfmapping_transloc_in;
- fp_halfmapping_mult = fp_halfmapping_mult_in;
- fp_halfmapping_mult_xs_1 = fp_halfmapping_mult_xs_1_in;
- fp_halfmapping_mult_xs_2 = fp_halfmapping_mult_xs_2_in;
- fp_paired_uniq_circular = fp_paired_uniq_circular_in;
- fp_paired_uniq_inv = fp_paired_uniq_inv_in;
- fp_paired_uniq_scr = fp_paired_uniq_scr_in;
- fp_paired_uniq_long = fp_paired_uniq_long_in;
- fp_paired_mult = fp_paired_mult_in;
- fp_paired_mult_xs_1 = fp_paired_mult_xs_1_in;
- fp_paired_mult_xs_2 = fp_paired_mult_xs_2_in;
- fp_concordant_uniq = fp_concordant_uniq_in;
- fp_concordant_circular = fp_concordant_circular_in;
- fp_concordant_transloc = fp_concordant_transloc_in;
- fp_concordant_mult = fp_concordant_mult_in;
- fp_concordant_mult_xs_1 = fp_concordant_mult_xs_1_in;
- fp_concordant_mult_xs_2 = fp_concordant_mult_xs_2_in;
- return;
-}
-
-void
-SAM_file_setup_all (FILE *fp_failedinput_1_in, FILE *fp_failedinput_2_in, FILE *fp_nomapping_in,
- FILE *fp_unpaired_uniq_in, FILE *fp_unpaired_circular_in, FILE *fp_unpaired_transloc_in,
- FILE *fp_unpaired_mult_in, FILE *fp_unpaired_mult_xs_1_in, FILE *fp_unpaired_mult_xs_2_in,
- FILE *fp_halfmapping_uniq_in, FILE *fp_halfmapping_circular_in, FILE *fp_halfmapping_transloc_in,
- FILE *fp_halfmapping_mult_in, FILE *fp_halfmapping_mult_xs_1_in, FILE *fp_halfmapping_mult_xs_2_in,
- FILE *fp_paired_uniq_circular_in, FILE *fp_paired_uniq_inv_in, FILE *fp_paired_uniq_scr_in,
- FILE *fp_paired_uniq_long_in, FILE *fp_paired_mult_in, FILE *fp_paired_mult_xs_1_in, FILE *fp_paired_mult_xs_2_in,
- FILE *fp_concordant_uniq_in, FILE *fp_concordant_circular_in, FILE *fp_concordant_transloc_in,
- FILE *fp_concordant_mult_in, FILE *fp_concordant_mult_xs_1_in, FILE *fp_concordant_mult_xs_2_in) {
-
- fp_failedinput_1 = fp_failedinput_1_in;
- fp_failedinput_2 = fp_failedinput_2_in;
-
- fp_nomapping = fp_nomapping_in;
- fp_unpaired_uniq = fp_unpaired_uniq_in;
- fp_unpaired_circular = fp_unpaired_circular_in;
- fp_unpaired_transloc = fp_unpaired_transloc_in;
- fp_unpaired_mult = fp_unpaired_mult_in;
- fp_unpaired_mult_xs_1 = fp_unpaired_mult_xs_1_in;
- fp_unpaired_mult_xs_2 = fp_unpaired_mult_xs_2_in;
- fp_halfmapping_uniq = fp_halfmapping_uniq_in;
- fp_halfmapping_circular = fp_halfmapping_circular_in;
- fp_halfmapping_transloc = fp_halfmapping_transloc_in;
- fp_halfmapping_mult = fp_halfmapping_mult_in;
- fp_halfmapping_mult_xs_1 = fp_halfmapping_mult_xs_1_in;
- fp_halfmapping_mult_xs_2 = fp_halfmapping_mult_xs_2_in;
- fp_paired_uniq_circular = fp_paired_uniq_circular_in;
- fp_paired_uniq_inv = fp_paired_uniq_inv_in;
- fp_paired_uniq_scr = fp_paired_uniq_scr_in;
- fp_paired_uniq_long = fp_paired_uniq_long_in;
- fp_paired_mult = fp_paired_mult_in;
- fp_paired_mult_xs_1 = fp_paired_mult_xs_1_in;
- fp_paired_mult_xs_2 = fp_paired_mult_xs_2_in;
- fp_concordant_uniq = fp_concordant_uniq_in;
- fp_concordant_circular = fp_concordant_circular_in;
- fp_concordant_transloc = fp_concordant_transloc_in;
- fp_concordant_mult = fp_concordant_mult_in;
- fp_concordant_mult_xs_1 = fp_concordant_mult_xs_1_in;
- fp_concordant_mult_xs_2 = fp_concordant_mult_xs_2_in;
+ chromosome_iit = chromosome_iit_in;
+ genome = genome_in;
return;
}
@@ -318,457 +201,47 @@ SAM_compute_flag (bool plusp, Stage3end_T mate, Resulttype_T resulttype,
}
-#if 0
-/* Replaced by new adjust_hardclips procedure in stage3hr.c */
-
-/* Shifts low_querypos and high_querypos upward until a matching
- nucleotide is found from both hits. If not found, the shifts
- low_querypos and high_querypos downward until a matching nucleotide
- is found. */
-
-static void
-adjust_hardclips (int *hardclip_low, Stage3end_T hit_low, int low_querylength,
- int *hardclip_high, Stage3end_T hit_high, int high_querylength) {
- int orig_hardclip_low, orig_hardclip_high;
- Substring_T low_substring, high_substring;
- struct Pair_T *low_pairarray, *high_pairarray;
- int low_querystart, low_queryend, low_npairs, high_npairs;
- int low_querypos, high_querypos;
- bool plusp;
-
- debug3(printf("Entering adjust_hardclips with hardclip_low %d, hardclip_high %d\n",
- *hardclip_low,*hardclip_high));
- orig_hardclip_low = *hardclip_low;
- orig_hardclip_high = *hardclip_high;
-
- plusp = Stage3end_plusp(hit_low);
-
- if (Stage3end_hittype(hit_low) == GMAP && Stage3end_hittype(hit_high) == GMAP) {
- debug3(printf("Dual GMAP\n"));
- low_pairarray = Stage3end_pairarray(hit_low);
- low_npairs = Stage3end_npairs(hit_low);
- high_pairarray = Stage3end_pairarray(hit_high);
- high_npairs = Stage3end_npairs(hit_high);
-
- if (plusp == true) {
- if (hide_soft_clips_p == true) {
- low_querystart = 0;
- } else {
- low_querystart = Stage3end_gmap_querystart(hit_low);
- }
- if (*hardclip_low > low_querystart) {
- low_querypos = *hardclip_low;
- high_querypos = high_querylength - 1 - (*hardclip_high);
- while (low_querypos < low_querylength && high_querypos < high_querylength &&
- (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
- Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos++;
- high_querypos++;
- }
- if (low_querypos >= low_querylength || high_querypos >= high_querylength) {
- debug3(printf("Querypos increase failed. Trying querypos decrease.\n"));
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos--;
- high_querypos--;
- while (low_querypos > 0 && high_querypos > 0 &&
- (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
- Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos--;
- high_querypos--;
- }
- if (low_querypos <= 0 || high_querypos <= 0) {
- *hardclip_low = orig_hardclip_low;
- *hardclip_high = orig_hardclip_high;
- }
- }
- }
-
- } else {
- if (hide_soft_clips_p == true) {
- low_queryend = low_querylength - 1;
- } else {
- low_queryend = Stage3end_gmap_queryend(hit_low);
- }
- if (low_querylength - *hardclip_low < low_queryend) {
- low_querypos = low_querylength - 1 - (*hardclip_low);
- high_querypos = *hardclip_high;
- while (low_querypos < low_querylength && high_querypos < high_querylength &&
- (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
- Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos++;
- high_querypos++;
- }
- if (low_querypos >= low_querylength || high_querypos >= high_querylength) {
- debug3(printf("Querypos increase failed. Trying querypos decrease.\n"));
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos--;
- high_querypos--;
- while (low_querypos > 0 && high_querypos > 0 &&
- (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
- Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos--;
- high_querypos--;
- }
- if (low_querypos <= 0 || high_querypos <= 0) {
- *hardclip_low = orig_hardclip_low;
- *hardclip_high = orig_hardclip_high;
- }
- }
- }
- }
-
- } else if (Stage3end_hittype(hit_low) == GMAP) {
- debug3(printf("Low GMAP\n"));
- low_pairarray = Stage3end_pairarray(hit_low);
- low_npairs = Stage3end_npairs(hit_low);
-
- if (plusp == true) {
- if (hide_soft_clips_p == true) {
- low_querystart = 0;
- } else {
- low_querystart = Stage3end_gmap_querystart(hit_low);
- }
- if (*hardclip_low > low_querystart) {
- low_querypos = *hardclip_low;
- high_querypos = high_querylength - 1 - (*hardclip_high);
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- while (low_querypos < low_querylength && high_querypos < high_querylength &&
- (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false || high_substring == NULL)) {
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos++;
- high_querypos++;
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- }
- if (low_querypos >= low_querylength || high_querypos >= high_querylength) {
- debug3(printf("Querypos increase failed. Trying querypos decrease.\n"));
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos--;
- high_querypos--;
- while (low_querypos > 0 && high_querypos > 0 &&
- (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false || high_substring == NULL)) {
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos--;
- high_querypos--;
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- }
- if (low_querypos <= 0 || high_querypos <= 0) {
- *hardclip_low = orig_hardclip_low;
- *hardclip_high = orig_hardclip_high;
- }
- }
- }
-
- } else {
- if (hide_soft_clips_p == true) {
- low_queryend = low_querylength - 1;
- } else {
- low_queryend = Stage3end_gmap_queryend(hit_low);
- }
- if (low_querylength - *hardclip_low < low_queryend) {
- low_querypos = low_querylength - 1 - (*hardclip_low);
- high_querypos = *hardclip_high;
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- while (low_querypos < low_querylength && high_querypos < high_querylength &&
- (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false || high_substring == NULL)) {
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos++;
- high_querypos++;
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- }
- if (low_querypos >= low_querylength || high_querypos >= high_querylength) {
- debug3(printf("Querypos increase failed. Trying querypos decrease.\n"));
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos--;
- high_querypos--;
- while (low_querypos > 0 && high_querypos > 0 &&
- (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false || high_substring == NULL)) {
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos--;
- high_querypos--;
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- }
- if (low_querypos <= 0 || high_querypos <= 0) {
- *hardclip_low = orig_hardclip_low;
- *hardclip_high = orig_hardclip_high;
- }
- }
- }
- }
-
- } else if (Stage3end_hittype(hit_high) == GMAP) {
- debug3(printf("High GMAP\n"));
- high_pairarray = Stage3end_pairarray(hit_high);
- high_npairs = Stage3end_npairs(hit_high);
-
- if (plusp == true) {
- if (hide_soft_clips_p == true) {
- low_querystart = Substring_querystart_orig(Stage3end_substring_low(hit_low));
- } else {
- low_querystart = Substring_querystart(Stage3end_substring_low(hit_low));
- }
- if (*hardclip_low > low_querystart) {
- low_querypos = *hardclip_low;
- high_querypos = high_querylength - 1 - (*hardclip_high);
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- while (low_querypos < low_querylength && high_querypos < high_querylength &&
- (low_substring == NULL || Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos++;
- high_querypos++;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- }
- if (low_querypos >= low_querylength || high_querypos >= high_querylength) {
- debug3(printf("Querypos increase failed. Trying querypos decrease.\n"));
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos--;
- high_querypos--;
- while (low_querypos > 0 && high_querypos > 0 &&
- (low_substring == NULL || Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos--;
- high_querypos--;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- }
- if (low_querypos <= 0 || high_querypos <= 0) {
- *hardclip_low = orig_hardclip_low;
- *hardclip_high = orig_hardclip_high;
- }
- }
- }
-
- } else {
- if (hide_soft_clips_p == true) {
- low_queryend = Substring_queryend_orig(Stage3end_substring_low(hit_low));
- } else {
- low_queryend = Substring_queryend(Stage3end_substring_low(hit_low));
- }
- if (low_querylength - *hardclip_low < low_queryend) {
- low_querypos = low_querylength - 1 - (*hardclip_low);
- high_querypos = *hardclip_high;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- while (low_querypos < low_querylength && high_querypos < high_querylength &&
- (low_substring == NULL || Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos++;
- high_querypos++;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- }
- if (low_querypos >= low_querylength || high_querypos >= high_querylength) {
- debug3(printf("Querypos increase failed. Trying querypos decrease.\n"));
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos--;
- high_querypos--;
- while (low_querypos > 0 && high_querypos > 0 &&
- (low_substring == NULL || Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false)) {
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos--;
- high_querypos--;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- }
- if (low_querypos <= 0 || high_querypos <= 0) {
- *hardclip_low = orig_hardclip_low;
- *hardclip_high = orig_hardclip_high;
- }
- }
- }
- }
-
- } else {
- if (plusp == true) {
- debug3(printf("Both substrings, plus\n"));
-
- if (hide_soft_clips_p == true) {
- low_querystart = Substring_querystart_orig(Stage3end_substring_low(hit_low));
- } else {
- low_querystart = Substring_querystart(Stage3end_substring_low(hit_low));
- }
-
- if (*hardclip_low > low_querystart) {
- low_querypos = *hardclip_low;
- high_querypos = high_querylength - 1 - *hardclip_high;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- while (low_querypos < low_querylength && high_querypos < high_querylength &&
- (low_substring == NULL || high_substring == NULL)) {
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos++;
- high_querypos++;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- }
- if (low_querypos >= low_querylength || high_querypos >= high_querylength) {
- debug3(printf("Querypos increase failed. Tryiing querypos decrease.\n"));
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos--;
- high_querypos--;
- while (low_querypos > 0 && high_querypos > 0 &&
- (low_substring == NULL || high_substring == NULL)) {
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos--;
- high_querypos--;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- }
- if (low_querypos <= 0 || high_querypos <= 0) {
- *hardclip_low = orig_hardclip_low;
- *hardclip_high = orig_hardclip_high;
- }
- }
- }
-
- } else {
- debug3(printf("Both substrings, minus\n"));
-
- if (hide_soft_clips_p == true) {
- low_queryend = Substring_queryend_orig(Stage3end_substring_low(hit_low));
- } else {
- low_queryend = Substring_queryend(Stage3end_substring_low(hit_low));
- }
-
- if (low_querylength - *hardclip_low < low_queryend) {
- low_querypos = low_querylength - 1 - (*hardclip_low);
- high_querypos = *hardclip_high;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- while (low_querypos < low_querylength && high_querypos < high_querylength &&
- (low_substring == NULL || high_substring == NULL)) {
- (*hardclip_low)--;
- (*hardclip_high)++;
- low_querypos++;
- high_querypos++;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- }
- if (low_querypos >= low_querylength || high_querypos >= high_querylength) {
- debug3(printf("Querypos increase failed. Trying querypos decrease.\n"));
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos--;
- high_querypos--;
- while (low_querypos > 0 && high_querypos > 0 &&
- (low_substring == NULL || high_substring == NULL)) {
- (*hardclip_low)++;
- (*hardclip_high)--;
- low_querypos--;
- high_querypos--;
- low_substring = Stage3end_substring_containing(hit_low,low_querypos);
- high_substring = Stage3end_substring_containing(hit_high,high_querypos);
- }
- if (low_querypos <= 0 || high_querypos <= 0) {
- *hardclip_low = orig_hardclip_low;
- *hardclip_high = orig_hardclip_high;
- }
- }
- }
- }
- }
-
- debug3(printf("Exiting adjust_hardclips with hardclip_low %d, hardclip_high %d\n",
- *hardclip_low,*hardclip_high));
-
- return;
-}
-#endif
-
-
-
Chrpos_T
-SAM_compute_chrpos (int hardclip_low, int hardclip_high, Stage3end_T this, int querylength) {
- Substring_T substring_low;
- Chrpos_T chrpos;
- int querystart, queryend;
- int trim_low;
+SAM_compute_chrpos (int hardclip_low, int hardclip_high, Stage3end_T this, int querylength,
+ bool first_read_p) {
+ Substring_T substring;
+ Hittype_T hittype;
if (this == NULL) {
return 0U;
- } else if (Stage3end_hittype(this) == GMAP) {
- chrpos = Pair_genomicpos_low(hardclip_low,hardclip_high,Stage3end_pairarray(this),Stage3end_npairs(this),
- querylength,/*watsonp*/Stage3end_plusp(this),hide_soft_clips_p);
-
- } else if (hide_soft_clips_p == true) {
- substring_low = Stage3end_substring_low(this,hardclip_low);
- if (Stage3end_plusp(this) == true) {
- /* Add 1 to report in 1-based coordinates */
- chrpos = Substring_alignstart_chr(substring_low) + 1U;
- querystart = Substring_querystart_orig(substring_low);
- chrpos += hardclip_low - querystart;
-
- } else {
- /* Add 1 to report in 1-based coordinates */
- chrpos = Substring_alignend_chr(substring_low) + 1U;
- queryend = Substring_queryend_orig(substring_low);
- chrpos += hardclip_low - (querylength - queryend);
- }
+ } else if ((hittype = Stage3end_hittype(this)) == GMAP) {
+ return Pair_genomicpos_low(hardclip_low,hardclip_high,Stage3end_pairarray(this),Stage3end_npairs(this),
+ querylength,/*watsonp*/Stage3end_plusp(this),hide_soft_clips_p);
- } else {
+ } else if (hittype == SAMECHR_SPLICE || hittype == TRANSLOC_SPLICE) {
+ /* Want concordant substring */
if (Stage3end_plusp(this) == true) {
- if ((trim_low = Stage3end_trim_left_raw(this)) < hardclip_low) {
- trim_low = hardclip_low;
+ if (first_read_p == true) {
+ /* Eventually want substringN */
+ substring = Stage3end_substring2(this);
+ } else {
+ substring = Stage3end_substring1(this);
}
- substring_low = Stage3end_substring_low(this,trim_low);
- debug4(printf("Plus: Substring containing trim_low %d is %d..%d => %u..%u\n",
- trim_low,Substring_querystart(substring_low),Substring_queryend(substring_low),
- Substring_alignstart_chr(substring_low),Substring_alignend_chr(substring_low)));
-
- /* Add 1 to report in 1-based coordinates */
- chrpos = Substring_alignstart_chr(substring_low) + 1U;
- querystart = Substring_querystart_orig(substring_low);
-
- debug4(printf("Incrementing chrpos by %d = %d - %d\n",trim_low - querystart,trim_low,querystart));
- chrpos += trim_low - querystart;
-
} else {
- if ((trim_low = Stage3end_trim_right_raw(this)) < hardclip_low) {
- trim_low = hardclip_low;
+ if (first_read_p == true) {
+ substring = Stage3end_substring1(this);
+ } else {
+ /* Eventually want substringN */
+ substring = Stage3end_substring2(this);
}
-
- substring_low = Stage3end_substring_low(this,trim_low);
- debug4(printf("Minus: Substring containing trim_low %d is %d..%d => %u..%u\n",
- trim_low,Substring_querystart(substring_low),Substring_queryend(substring_low),
- Substring_alignstart_chr(substring_low),Substring_alignend_chr(substring_low)));
-
- /* Add 1 to report in 1-based coordinates */
- chrpos = Substring_alignend_chr(substring_low) + 1U;
- queryend = Substring_queryend_orig(substring_low);
- debug4(printf("queryend is %d, chrpos is %u\n",queryend,chrpos));
-
- debug4(printf("Incrementing chrpos by %d = %d - (querylength %d - %d)\n",
- trim_low - (querylength - queryend),trim_low,querylength,queryend));
- chrpos += trim_low - (querylength - queryend);
}
+ return Substring_compute_chrpos(substring,hardclip_low,hide_soft_clips_p);
+
+ } else {
+ /* Want low substring */
+ substring = Stage3end_substring_low(this,hardclip_low);
+ return Substring_compute_chrpos(substring,hardclip_low,hide_soft_clips_p);
}
-
- return chrpos;
}
static void
-print_chromosomal_pos (FILE *fp, Chrnum_T chrnum, Chrpos_T chrpos, Chrpos_T chrlength,
+print_chromosomal_pos (Filestring_T fp, Chrnum_T chrnum, Chrpos_T chrpos, Chrpos_T chrlength,
Univ_IIT_T chromosome_iit) {
bool allocp;
char *chr;
@@ -776,7 +249,7 @@ print_chromosomal_pos (FILE *fp, Chrnum_T chrnum, Chrpos_T chrpos, Chrpos_T chrl
#if 0
if (chrpos == 0U) {
/* No mapping */
- fprintf(fp,"\t*\t0");
+ FPRINTF(fp,"\t*\t0");
return;
}
#endif
@@ -788,70 +261,63 @@ print_chromosomal_pos (FILE *fp, Chrnum_T chrnum, Chrpos_T chrpos, Chrpos_T chrl
} else {
chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
- fprintf(fp,"\t%s",chr);
- if (allocp == true) {
- FREE(chr);
- }
/* chrpos already in 1-based coordinates */
if (chrpos > chrlength) {
- fprintf(fp,"\t%u",chrpos - chrlength /*+1U*/);
+ FPRINTF(fp,"\t%s\t%u",chr,chrpos - chrlength /*+1U*/);
} else {
- fprintf(fp,"\t%u",chrpos /*+1U*/);
+ FPRINTF(fp,"\t%s\t%u",chr,chrpos /*+1U*/);
+ }
+
+ if (allocp == true) {
+ FREE(chr);
}
+
return;
}
}
static void
-print_mate_chromosomal_pos (FILE *fp, Chrnum_T mate_chrnum, Chrnum_T mate_effective_chrnum,
+print_mate_chromosomal_pos (Filestring_T fp, Chrnum_T mate_chrnum, Chrnum_T mate_effective_chrnum,
Chrpos_T mate_chrpos, Chrpos_T mate_chrlength, Chrnum_T anchor_chrnum, Chrpos_T anchor_chrpos,
Univ_IIT_T chromosome_iit) {
bool allocp;
char *chr;
if (mate_chrpos == 0U) {
- /* No mapping */
- fprintf(fp,"\t*\t0");
- return;
-
- } else if (mate_chrnum == 0) {
- /* Interchromosomal splice. Choose effective chrnum. */
- if (anchor_chrpos > 0U && anchor_chrnum > 0 && mate_effective_chrnum == anchor_chrnum) {
- fprintf(fp,"\t=");
- } else {
- chr = Univ_IIT_label(chromosome_iit,mate_effective_chrnum,&allocp);
- fprintf(fp,"\t%s",chr);
- if (allocp == true) {
- FREE(chr);
- }
- }
-
- /* chrpos already in 1-based coordinates */
- if (mate_chrpos > mate_chrlength) {
- fprintf(fp,"\t%u",mate_chrpos - mate_chrlength /*+1U*/);
- } else {
- fprintf(fp,"\t%u",mate_chrpos /*+1U*/);
- }
+ FPRINTF(fp,"\t*\t0");
return;
} else {
+ if (mate_chrnum == 0) {
+ /* Interchromosomal splice. Choose effective chrnum. */
+ mate_chrnum = mate_effective_chrnum;
+ }
+
if (anchor_chrpos > 0U && anchor_chrnum > 0 && mate_chrnum == anchor_chrnum) {
- fprintf(fp,"\t=");
+ /* chrpos already in 1-based coordinates */
+ if (mate_chrpos > mate_chrlength) {
+ FPRINTF(fp,"\t=\t%u",mate_chrpos - mate_chrlength /*+1U*/);
+ } else {
+ FPRINTF(fp,"\t=\t%u",mate_chrpos /*+1U*/);
+ }
+
} else {
chr = Univ_IIT_label(chromosome_iit,mate_chrnum,&allocp);
- fprintf(fp,"\t%s",chr);
+
+ /* chrpos already in 1-based coordinates */
+ if (mate_chrpos > mate_chrlength) {
+ FPRINTF(fp,"\t%s\t%u",chr,mate_chrpos - mate_chrlength /*+1U*/);
+ } else {
+ FPRINTF(fp,"\t%s\t%u",chr,mate_chrpos /*+1U*/);
+ }
+
if (allocp == true) {
FREE(chr);
}
}
/* chrpos already in 1-based coordinates */
- if (mate_chrpos > mate_chrlength) {
- fprintf(fp,"\t%u",mate_chrpos - mate_chrlength /*+1U*/);
- } else {
- fprintf(fp,"\t%u",mate_chrpos /*+1U*/);
- }
return;
}
}
@@ -878,7 +344,7 @@ make_complement_buffered (char *complement, char *sequence, unsigned int length)
/* npaths could be non-zero, if user selected --quiet-if-excessive */
void
-SAM_print_nomapping (FILE *fp, char *abbrev, Shortread_T queryseq, Stage3end_T mate, char *acc1, char *acc2,
+SAM_print_nomapping (Filestring_T fp, char *abbrev, Shortread_T queryseq, Stage3end_T mate, char *acc1, char *acc2,
Univ_IIT_T chromosome_iit, Resulttype_T resulttype, bool first_read_p,
int npaths, int npaths_mate, Chrpos_T mate_chrpos, int quality_shift,
char *sam_read_group_id, bool invertp, bool invert_mate_p) {
@@ -887,29 +353,20 @@ SAM_print_nomapping (FILE *fp, char *abbrev, Shortread_T queryseq, Stage3end_T m
/* 1. QNAME */
if (acc2 == NULL) {
- fprintf(fp,"%s",acc1);
+ FPRINTF(fp,"%s",acc1);
} else {
- fprintf(fp,"%s,%s",acc1,acc2);
+ FPRINTF(fp,"%s,%s",acc1,acc2);
}
/* 2. FLAG */
- flag = SAM_compute_flag(/*plusp (NA)*/true,mate,resulttype,first_read_p,
- /*pathnum*/0,/*npaths*/0,npaths_mate,
- /*absmq_score*/0,/*first_absmq*/0,invertp,invert_mate_p);
- fprintf(fp,"\t%u",flag);
-
/* 3. RNAME: chr */
- fprintf(fp,"\t*");
-
/* 4. POS: chrpos */
- fprintf(fp,"\t0");
-
- /* 5. MAPQ: Mapping quality */
- /* Picard says MAPQ should be 0 for an unmapped read */
- fprintf(fp,"\t0");
-
+ /* 5. MAPQ: Mapping quality. Picard says MAPQ should be 0 for an unmapped read */
/* 6. CIGAR */
- fprintf(fp,"\t*");
+ flag = SAM_compute_flag(/*plusp (NA)*/true,mate,resulttype,first_read_p,
+ /*pathnum*/0,/*npaths*/0,npaths_mate,
+ /*absmq_score*/0,/*first_absmq*/0,invertp,invert_mate_p);
+ FPRINTF(fp,"\t%u\t*\t0\t0\t*",flag);
/* 7. MRNM: Mate chr */
/* 8. MPOS: Mate chrpos */
@@ -919,31 +376,30 @@ SAM_print_nomapping (FILE *fp, char *abbrev, Shortread_T queryseq, Stage3end_T m
/* 9. ISIZE: Insert size */
- fprintf(fp,"\t0");
+ FPRINTF(fp,"\t0");
/* 10. SEQ: queryseq and 11. QUAL: quality scores */
/* Since there is no mapping, we print the original query sequence. */
- fprintf(fp,"\t");
if (invertp == false) {
- Shortread_print_chopped(fp,queryseq,/*hardclip_low*/0,/*hardclip_high*/0);
- fprintf(fp,"\t");
+ Shortread_print_chopped_sam(fp,queryseq,/*hardclip_low*/0,/*hardclip_high*/0);
+ FPRINTF(fp,"\t");
Shortread_print_quality(fp,queryseq,/*hardclip_low*/0,/*hardclip_high*/0,
quality_shift,/*show_chopped_p*/false);
} else {
- Shortread_print_chopped_revcomp(fp,queryseq,/*hardclip_low*/0,/*hardclip_high*/0);
- fprintf(fp,"\t");
+ Shortread_print_chopped_revcomp_sam(fp,queryseq,/*hardclip_low*/0,/*hardclip_high*/0);
+ FPRINTF(fp,"\t");
Shortread_print_quality_revcomp(fp,queryseq,/*hardclip_low*/0,/*hardclip_high*/0,
quality_shift,/*show_chopped_p*/false);
}
/* 12. TAGS: RG */
if (sam_read_group_id != NULL) {
- fprintf(fp,"\tRG:Z:%s",sam_read_group_id);
+ FPRINTF(fp,"\tRG:Z:%s",sam_read_group_id);
}
/* 12. TAGS: NH */
if (npaths > 0) {
- fprintf(fp,"\tNH:i:%d",npaths);
+ FPRINTF(fp,"\tNH:i:%d",npaths);
}
/* 12. TAGS: XB */
@@ -953,9 +409,9 @@ SAM_print_nomapping (FILE *fp, char *abbrev, Shortread_T queryseq, Stage3end_T m
Shortread_print_chop(fp,queryseq,invertp);
/* 12. TAGS: XO */
- fprintf(fp,"\tXO:Z:%s",abbrev);
+ FPRINTF(fp,"\tXO:Z:%s",abbrev);
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
return;
}
@@ -963,13 +419,13 @@ SAM_print_nomapping (FILE *fp, char *abbrev, Shortread_T queryseq, Stage3end_T m
/* Derived from print_tokens_gff3 */
static void
-print_tokens_sam (FILE *fp, List_T tokens) {
+print_tokens_sam (Filestring_T fp, List_T tokens) {
List_T p;
char *token;
for (p = tokens; p != NULL; p = List_next(p)) {
token = (char *) List_head(p);
- fprintf(fp,"%s",token);
+ FPRINTF(fp,"%s",token);
FREE(token);
}
@@ -1341,8 +797,8 @@ compute_cigar_types_only (Intlist_T types, char type, int stringlength, int quer
static void
-print_cigar (FILE *fp, char type, int stringlength, int querypos, int querylength,
- int hardclip_low, int hardclip_high, bool plusp, int lastp) {
+print_cigar (Filestring_T fp, char type, int stringlength, int querypos, int querylength,
+ int hardclip_low, int hardclip_high, bool plusp, bool lastp, int trimlength) {
int matchlength = 0;
int startpos, endpos;
int cliplength = 0;
@@ -1370,12 +826,20 @@ print_cigar (FILE *fp, char type, int stringlength, int querypos, int querylengt
if (endpos >= startpos) {
if (cliplength > 0) {
debug1(printf(" Pushing initial %dH\n",cliplength));
- fprintf(fp,"%dH",cliplength);
+ FPRINTF(fp,"%dH",cliplength);
}
matchlength = endpos - startpos;
- if (matchlength > 0) {
+ if (matchlength <= 0) {
+ /* Skip */
+ } else if (type != 'E') {
debug1(printf(" Pushing %d%c\n",matchlength,type));
- fprintf(fp,"%d%c",matchlength,type);
+ FPRINTF(fp,"%d%c",matchlength,type);
+ } else if (matchlength == trimlength) {
+ debug1(printf(" Pushing %dS\n",matchlength));
+ FPRINTF(fp,"%dS",matchlength);
+ } else {
+ debug1(printf(" Pushing %dH\n",matchlength));
+ FPRINTF(fp,"%dH",matchlength);
}
}
@@ -1385,7 +849,7 @@ print_cigar (FILE *fp, char type, int stringlength, int querypos, int querylengt
cliplength = querylength - endpos;
if (cliplength > 0) {
debug1(printf(" Pushing final %dH\n",cliplength));
- fprintf(fp,"%dH",cliplength);
+ FPRINTF(fp,"%dH",cliplength);
}
}
@@ -1413,12 +877,20 @@ print_cigar (FILE *fp, char type, int stringlength, int querypos, int querylengt
if (endpos <= startpos) {
if (cliplength > 0) {
debug1(printf(" Pushing initial %dH\n",cliplength));
- fprintf(fp,"%dH",cliplength);
+ FPRINTF(fp,"%dH",cliplength);
}
matchlength = startpos - endpos;
- if (matchlength > 0) {
+ if (matchlength <= 0) {
+ /* Skip */
+ } else if (type != 'E') {
debug1(printf(" Pushing %d%c\n",matchlength,type));
- fprintf(fp,"%d%c",matchlength,type);
+ FPRINTF(fp,"%d%c",matchlength,type);
+ } else if (matchlength == trimlength) {
+ debug1(printf(" Pushing %dS\n",matchlength));
+ FPRINTF(fp,"%dS",matchlength);
+ } else {
+ debug1(printf(" Pushing %dH\n",matchlength));
+ FPRINTF(fp,"%dH",matchlength);
}
}
@@ -1427,7 +899,7 @@ print_cigar (FILE *fp, char type, int stringlength, int querypos, int querylengt
cliplength = endpos;
if (cliplength > 0) {
debug1(printf(" Pushing final %dH\n",cliplength));
- fprintf(fp,"%dH",cliplength);
+ FPRINTF(fp,"%dH",cliplength);
}
}
}
@@ -1438,7 +910,7 @@ print_cigar (FILE *fp, char type, int stringlength, int querypos, int querylengt
static int
print_md_string (bool *printp, int *nmismatches_refdiff, int *nmismatches_bothdiff,
- FILE *fp, int matchlength, char *genomicfwd_refdiff, char *genomicfwd_bothdiff,
+ Filestring_T fp, int matchlength, char *genomicfwd_refdiff, char *genomicfwd_bothdiff,
int stringlength, int querypos, int querylength,
int hardclip_low, int hardclip_high, bool plusp, bool lastp) {
int starti, endi, i;
@@ -1487,11 +959,11 @@ print_md_string (bool *printp, int *nmismatches_refdiff, int *nmismatches_bothdi
} else {
/* A true mismatch against both variants */
if (matchlength > 0 || hardclip_end_p == true) {
- fprintf(fp,"%d",matchlength);
+ FPRINTF(fp,"%d",matchlength);
*printp = true;
hardclip_end_p = false;
}
- fprintf(fp,"%c",toupper(genomicfwd_refdiff[i]));
+ FPRINTF(fp,"%c",toupper(genomicfwd_refdiff[i]));
*printp = true;
local_nmismatches += 1;
matchlength = 0;
@@ -1507,11 +979,11 @@ print_md_string (bool *printp, int *nmismatches_refdiff, int *nmismatches_bothdi
} else if (isupper(genomicfwd_bothdiff[i])) {
/* A mismatch against the reference only => alternate variant */
if (matchlength > 0 || hardclip_end_p == true) {
- fprintf(fp,"%d",matchlength);
+ FPRINTF(fp,"%d",matchlength);
*printp = true;
hardclip_end_p = false;
}
- fprintf(fp,"%c",genomicfwd_refdiff[i]); /* Leave as lower case */
+ FPRINTF(fp,"%c",genomicfwd_refdiff[i]); /* Leave as lower case */
*printp = true;
local_nmismatches += 1;
matchlength = 0;
@@ -1519,11 +991,11 @@ print_md_string (bool *printp, int *nmismatches_refdiff, int *nmismatches_bothdi
} else {
/* A true mismatch against both variants */
if (matchlength > 0 || hardclip_end_p == true) {
- fprintf(fp,"%d",matchlength);
+ FPRINTF(fp,"%d",matchlength);
*printp = true;
hardclip_end_p = false;
}
- fprintf(fp,"%c",toupper(genomicfwd_refdiff[i]));
+ FPRINTF(fp,"%c",toupper(genomicfwd_refdiff[i]));
*printp = true;
local_nmismatches += 1;
matchlength = 0;
@@ -1576,11 +1048,11 @@ print_md_string (bool *printp, int *nmismatches_refdiff, int *nmismatches_bothdi
} else {
if (matchlength > 0 || hardclip_end_p == true) {
- fprintf(fp,"%d",matchlength);
+ FPRINTF(fp,"%d",matchlength);
*printp = true;
hardclip_end_p = false;
}
- fprintf(fp,"%c",toupper(genomicfwd_refdiff[i]));
+ FPRINTF(fp,"%c",toupper(genomicfwd_refdiff[i]));
*printp = true;
local_nmismatches += 1;
matchlength = 0;
@@ -1596,11 +1068,11 @@ print_md_string (bool *printp, int *nmismatches_refdiff, int *nmismatches_bothdi
} else if (isupper(genomicfwd_bothdiff[i])) {
/* A mismatch against the reference only => alternate variant */
if (matchlength > 0 || hardclip_end_p == true) {
- fprintf(fp,"%d",matchlength);
+ FPRINTF(fp,"%d",matchlength);
*printp = true;
hardclip_end_p = false;
}
- fprintf(fp,"%c",genomicfwd_refdiff[i]); /* Leave as lower case */
+ FPRINTF(fp,"%c",genomicfwd_refdiff[i]); /* Leave as lower case */
*printp = true;
local_nmismatches += 1;
matchlength = 0;
@@ -1608,11 +1080,11 @@ print_md_string (bool *printp, int *nmismatches_refdiff, int *nmismatches_bothdi
} else {
/* A true mismatch against both variants */
if (matchlength > 0 || hardclip_end_p == true) {
- fprintf(fp,"%d",matchlength);
+ FPRINTF(fp,"%d",matchlength);
*printp = true;
hardclip_end_p = false;
}
- fprintf(fp,"%c",toupper(genomicfwd_refdiff[i]));
+ FPRINTF(fp,"%c",toupper(genomicfwd_refdiff[i]));
*printp = true;
local_nmismatches += 1;
matchlength = 0;
@@ -1640,7 +1112,7 @@ print_md_string (bool *printp, int *nmismatches_refdiff, int *nmismatches_bothdi
if (lastp == false) {
return matchlength;
} else if (matchlength > 0) {
- fprintf(fp,"%d",matchlength);
+ FPRINTF(fp,"%d",matchlength);
*printp = true;
return 0;
} else {
@@ -1653,7 +1125,7 @@ print_md_string (bool *printp, int *nmismatches_refdiff, int *nmismatches_bothdi
static bool
check_cigar_types (Intlist_T cigar_types) {
Intlist_T p;
- int type, last_type = 'M';
+ int type;
bool M_present_p = false;
for (p = cigar_types; p != NULL; p = Intlist_next(p)) {
@@ -1677,2137 +1149,380 @@ check_cigar_types (Intlist_T cigar_types) {
static void
-print_single (FILE *fp, char *abbrev, Hittype_T hittype, Stage3end_T this, Stage3end_T mate,
- char *acc1, char *acc2, int pathnum, int npaths,
- int absmq_score, int first_absmq, int second_absmq, int mapq_score,
- Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
- Chrpos_T chrpos, Chrpos_T mate_chrpos, int clipdir, int hardclip_low, int hardclip_high,
- Resulttype_T resulttype, bool first_read_p,
- int npaths_mate, int quality_shift,
- char *sam_read_group_id, bool invertp, bool invert_mate_p, bool circularp) {
+print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3end_T mate,
+ char *acc1, char *acc2, int pathnum, int npaths,
+ int absmq_score, int first_absmq, int second_absmq, int mapq_score,
+ Shortread_T queryseq, int pairedlength,
+ Chrpos_T chrpos, Chrpos_T mate_chrpos, int hardclip_low, int hardclip_high,
+ Resulttype_T resulttype, bool first_read_p, int npaths_mate,
+ int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
+ bool circularp) {
unsigned int flag = 0U;
- Substring_T substring;
- int nmismatches_refdiff = 0, nmismatches_bothdiff = 0, querylength, substring_start, substring_length;
- char *genomicfwd_refdiff, *genomicfwd_bothdiff, *genomicdir_refdiff, *genomicdir_bothdiff;
- bool plusp, printp;
+ Substring_T substring, substringL, substringH, substringM;
+ Junction_T post_junction;
+ int type;
+ int nindels = 0;
+
+ List_T substrings_LtoH, junctions_LtoH;
+ List_T startp, endp, startq, prevp, finalp, nextp, p, q;
+ int substring_start, substring_length, matchlength;
- debug(printf("print_single\n"));
+ int nmismatches_refdiff = 0, nmismatches_bothdiff = 0, querylength;
+ int sensedir;
+ char *genomicfwd_refdiff, *genomicfwd_bothdiff, *genomicdir_refdiff, *genomicdir_bothdiff;
+ char *deletion_string;
+ bool plusp, lastp, printp;
+ bool ambigL, ambigH;
+ int n, i;
+ Univcoord_T *ambcoords, splicecoord;
+#ifdef PRINT_AMBIG_COORDS
+ Univcoord_T chroffset;
+#endif
+
querylength = Shortread_fulllength(queryseq);
- plusp = Stage3end_plusp(this);
- substring = Stage3end_substring1(this);
+ plusp = Stage3end_plusp(stage3end);
- debug(printf("clipdir is %d, hardclip_low %d, hardclip_high %d\n",clipdir,hardclip_low,hardclip_high));
+ if ((sensedir = Stage3end_sensedir(stage3end)) == SENSE_NULL && mate != NULL) {
+ sensedir = Stage3end_sensedir(mate);
+ }
+ /* sensep = (sensedir == SENSE_ANTI) ? false : true; */
/* 1. QNAME */
if (acc2 == NULL) {
- fprintf(fp,"%s",acc1);
+ FPRINTF(fp,"%s",acc1);
} else {
- fprintf(fp,"%s,%s",acc1,acc2);
+ FPRINTF(fp,"%s,%s",acc1,acc2);
}
/* 2. FLAG */
- flag = SAM_compute_flag(Stage3end_plusp(this),mate,resulttype,first_read_p,
+ flag = SAM_compute_flag(plusp,mate,resulttype,first_read_p,
pathnum,npaths,npaths_mate,absmq_score,first_absmq,
invertp,invert_mate_p);
- fprintf(fp,"\t%u",flag);
+ FPRINTF(fp,"\t%u",flag);
/* 3. RNAME: chr */
/* 4. POS: chrpos */
- print_chromosomal_pos(fp,Stage3end_chrnum(this),chrpos,Stage3end_chrlength(this),chromosome_iit);
+ print_chromosomal_pos(fp,Stage3end_chrnum(stage3end),chrpos,Stage3end_chrlength(stage3end),chromosome_iit);
/* 5. MAPQ: Mapping quality */
- fprintf(fp,"\t%d",mapq_score);
+ FPRINTF(fp,"\t%d",mapq_score);
/* 6. CIGAR */
- fprintf(fp,"\t");
-
- if (plusp == true) {
- if (hide_soft_clips_p == true && hittype != TERMINAL) {
- print_cigar(fp,/*type*/'M',
- Substring_querystart(substring) + Substring_match_length(substring) +
- (querylength - Substring_queryend(substring)),/*querypos*/0,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else {
- print_cigar(fp,/*type*/'S',Substring_querystart(substring),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- print_cigar(fp,/*type*/'M',Substring_match_length(substring),
- /*querypos*/Substring_querystart(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
- /*querypos*/Substring_queryend(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- }
-
+ FPRINTF(fp,"\t");
+ substrings_LtoH = Stage3end_substrings_LtoH(stage3end);
+ junctions_LtoH = Stage3end_junctions_LtoH(stage3end);
+ substringL = (Substring_T) List_head(substrings_LtoH);
+ substringH = (Substring_T) List_last_value(substrings_LtoH);
+ if (Substring_ambiguous_p(substringL) == true) {
+ prevp = substrings_LtoH;
+ startp = List_next(substrings_LtoH);
+ startq = List_next(junctions_LtoH);
} else {
- if (hide_soft_clips_p == true && hittype != TERMINAL) {
- print_cigar(fp,/*type*/'M',
- (querylength - Substring_queryend(substring)) +
- Substring_match_length(substring) + Substring_querystart(substring),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/true);
- } else {
- print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/false);
- print_cigar(fp,/*type*/'M',Substring_match_length(substring),
- /*querypos*/Substring_queryend(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- print_cigar(fp,/*type*/'S',Substring_querystart(substring),
- /*querypos*/Substring_querystart(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- }
+ prevp = (List_T) NULL;
+ startp = substrings_LtoH;
+ startq = junctions_LtoH;
}
-
-
- /* 7. MRNM: Mate chr */
- /* 8. MPOS: Mate chrpos */
- print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
- mate_chrpos,Stage3end_chrlength(mate),
- Stage3end_chrnum(this),chrpos,chromosome_iit);
-
-
- /* 9. ISIZE: Insert size */
- if (resulttype == CONCORDANT_UNIQ || resulttype == CONCORDANT_TRANSLOC || resulttype == CONCORDANT_MULT) {
- if (plusp == invertp) {
- fprintf(fp,"\t%d",-pairedlength);
- } else {
- fprintf(fp,"\t%d",pairedlength);
- }
- } else if (mate_chrpos == 0) {
- fprintf(fp,"\t%d",pairedlength);
- } else if (chrpos < mate_chrpos) {
- fprintf(fp,"\t%d",pairedlength);
- } else if (chrpos > mate_chrpos) {
- fprintf(fp,"\t%d",-pairedlength);
- } else if (first_read_p == true) {
- fprintf(fp,"\t%d",pairedlength);
+ if (Substring_ambiguous_p(substringH) == true) {
+ endp = List_last_item(substrings_LtoH);
} else {
- fprintf(fp,"\t%d",-pairedlength);
+ endp = (List_T) NULL;
}
+ debug(printf("End has %d substrings\n",List_length(substrings_LtoH)));
- /* 10. SEQ: queryseq and 11. QUAL: quality scores */
- /* Queryseq has already been inverted, so just measure plusp relative to its current state */
- fprintf(fp,"\t");
+ p = startp;
+ q = startq;
if (plusp == true) {
- Shortread_print_chopped(fp,queryseq,hardclip_low,hardclip_high);
- fprintf(fp,"\t");
- Shortread_print_quality(fp,queryseq,hardclip_low,hardclip_high,
- quality_shift,/*show_chopped_p*/false);
- } else {
- Shortread_print_chopped_revcomp(fp,queryseq,hardclip_low,hardclip_high);
- fprintf(fp,"\t");
- Shortread_print_quality_revcomp(fp,queryseq,hardclip_low,hardclip_high,
- quality_shift,/*show_chopped_p*/false);
- }
+ /* Plus */
+ while (p != endp && Substring_queryend((Substring_T) List_head(p)) < hardclip_low) {
+ /* Skip, because substring entirely in hard-clipped region */
+ debug(printf("Skipping %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+ Substring_queryend((Substring_T) List_head(p))));
+ prevp = p;
+ p = List_next(p);
+ q = List_next(q);
+ }
+
+ substring = (Substring_T) List_head(p);
+ if (List_next(p) == endp || Substring_queryend(substring) >= querylength - hardclip_high) {
+ /* Single substring */
+ debug(printf("Single substring %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+ Substring_queryend((Substring_T) List_head(p))));
- /* 12. TAGS: RG */
- if (sam_read_group_id != NULL) {
- fprintf(fp,"\tRG:Z:%s",sam_read_group_id);
- }
+ if (hide_soft_clips_p == true) {
+ print_cigar(fp,/*type*/'M',
+ Substring_querystart(substring) + Substring_match_length(substring) +
+ (querylength - Substring_queryend(substring)),/*querypos*/0,querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
+ } else {
+ print_cigar(fp,/*type*/'S',Substring_querystart(substring),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ print_cigar(fp,/*type*/'M',Substring_match_length(substring),
+ /*querypos*/Substring_querystart(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
+ }
+ finalp = p;
+ nextp = List_next(p);
- /* 12. TAGS: XH */
- if (hardclip_low > 0 || hardclip_high > 0) {
- fprintf(fp,"\tXH:Z:");
- if (plusp == true) {
- Shortread_print_chopped_end(fp,queryseq,hardclip_low,hardclip_high);
} else {
- Shortread_print_chopped_end_revcomp(fp,queryseq,hardclip_low,hardclip_high);
- }
- }
+ /* First substring, plus */
+ debug(printf("First substring, plus %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+ Substring_queryend((Substring_T) List_head(p))));
- /* 12. TAGS: XB */
- Shortread_print_barcode(fp,queryseq);
+ post_junction = (Junction_T) List_head(q);
- /* 12. TAGS: XP. Logically should be last in reconstructing a read. */
- Shortread_print_chop(fp,queryseq,invertp);
-
- /* 12. TAGS: MD */
- fprintf(fp,"\tMD:Z:");
- printp = false;
-
- if (hide_soft_clips_p == true) {
- substring_start = Substring_querystart_orig(substring);
- substring_length = Substring_match_length_orig(substring);
- } else {
- substring_start = Substring_querystart(substring);
- substring_length = Substring_match_length(substring);
- }
-
- if ((genomicdir_bothdiff = Substring_genomic_bothdiff(substring)) == NULL) {
- if (plusp == true) {
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,/*genomicfwd_refdiff*/NULL,/*genomicfwd_bothdiff*/NULL,
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else {
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,/*genomicfwd_refdiff*/NULL,/*genomicfwd_bothdiff*/NULL,
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- }
-
- } else if (plusp == true) {
- genomicdir_refdiff = Substring_genomic_refdiff(substring);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,&(genomicdir_refdiff[substring_start]),&(genomicdir_bothdiff[substring_start]),
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
-
- } else if ((genomicdir_refdiff = Substring_genomic_refdiff(substring)) == genomicdir_bothdiff) {
- genomicfwd_refdiff = (char *) MALLOCA((querylength+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- FREEA(genomicfwd_refdiff);
-
- } else {
- genomicfwd_refdiff = (char *) MALLOCA((querylength+1) * sizeof(char));
- genomicfwd_bothdiff = (char *) MALLOCA((querylength+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
- make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring_start]),substring_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- FREEA(genomicfwd_bothdiff);
- FREEA(genomicfwd_refdiff);
- }
-
- if (printp == false) {
- fprintf(fp,"0");
- }
-
-
- /* 12. TAGS: NH */
- fprintf(fp,"\tNH:i:%d",npaths);
-
- /* 12. TAGS: HI */
- fprintf(fp,"\tHI:i:%d",pathnum);
-
- /* 12. TAGS: NM */
- /* fprintf(fp,"\tNM:i:%d",Stage3end_nmismatches_refdiff(this)); */
- fprintf(fp,"\tNM:i:%d",nmismatches_refdiff);
-
- if (snps_iit) {
- /* 12. TAGS: XW and XV */
- fprintf(fp,"\tXW:i:%d",nmismatches_bothdiff);
- fprintf(fp,"\tXV:i:%d",nmismatches_refdiff - nmismatches_bothdiff);
- }
-
- /* 12. TAGS: SM */
- fprintf(fp,"\tSM:i:%d",mapq_score);
-
- /* 12. TAGS: XQ */
- fprintf(fp,"\tXQ:i:%d",absmq_score);
-
- /* 12. TAGS: X2 */
- fprintf(fp,"\tX2:i:%d",second_absmq);
-
- /* 12. TAGS: XO */
- fprintf(fp,"\tXO:Z:%s",abbrev);
-
- /* 12. TAGS: XC */
- if (circularp == true) {
- fprintf(fp,"\tXC:A:+");
- }
-
- /* 12. TAGS: XG */
- if (Stage3end_sarrayp(this) == true) {
- fprintf(fp,"\tXG:Z:A");
- } else if (Stage3end_hittype(this) == TERMINAL) {
- fprintf(fp,"\tXG:Z:T");
- }
-
- fprintf(fp,"\n");
- return;
-}
-
-
-static bool
-check_cigar_single (Hittype_T hittype, Stage3end_T this,
- int querylength, int clipdir, int hardclip_low, int hardclip_high,
- bool first_read_p, bool circularp) {
- bool result;
- Intlist_T cigar_types = NULL;
- Substring_T substring;
- bool plusp;
-
- plusp = Stage3end_plusp(this);
- substring = Stage3end_substring1(this);
-
- debug1(printf("clipdir is %d, hardclip_low %d, hardclip_high %d\n",clipdir,hardclip_low,hardclip_high));
-
- if (plusp == true) {
- if (hide_soft_clips_p == true && hittype != TERMINAL) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_querystart(substring) + Substring_match_length(substring) +
- (querylength - Substring_queryend(substring)),/*querypos*/0,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring),
- /*querypos*/Substring_querystart(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring),
- /*querypos*/Substring_queryend(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- }
-
- } else {
- if (hide_soft_clips_p == true && hittype != TERMINAL) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- (querylength - Substring_queryend(substring)) +
- Substring_match_length(substring) + Substring_querystart(substring),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/true);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring),
- /*querypos*/Substring_queryend(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring),
- /*querypos*/Substring_querystart(substring),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- }
- }
-
- result = check_cigar_types(cigar_types);
-
- Intlist_free(&cigar_types);
- return result;
-}
-
-
-
-static void
-print_insertion (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
- char *acc1, char *acc2, int pathnum, int npaths,
- int absmq_score, int first_absmq, int second_absmq, int mapq_score,
- Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
- Chrpos_T chrpos, Chrpos_T mate_chrpos, int clipdir, int hardclip_low, int hardclip_high,
- Resulttype_T resulttype, bool first_read_p, int npaths_mate,
- int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
- bool circularp) {
- unsigned int flag = 0U;
- Substring_T substring1, substring2;
- int nmismatches_refdiff = 0, nmismatches_bothdiff = 0, querylength;
- char *genomicfwd_refdiff, *genomicfwd_bothdiff, *genomicdir_refdiff, *genomicdir_bothdiff;
- int substring1_start, substring2_start, substring1_length, substring2_length, matchlength, nindels;
- bool plusp, printp;
- List_T cigar_tokens = NULL;
-
- querylength = Shortread_fulllength(queryseq);
- plusp = Stage3end_plusp(this);
-
- substring1 = Stage3end_substring1(this);
- substring2 = Stage3end_substring2(this);
-
- nindels = Stage3end_nindels(this);
-
- /* 1. QNAME */
- if (acc2 == NULL) {
- fprintf(fp,"%s",acc1);
- } else {
- fprintf(fp,"%s,%s",acc1,acc2);
- }
-
- /* 2. FLAG */
- flag = SAM_compute_flag(plusp,mate,resulttype,first_read_p,
- pathnum,npaths,npaths_mate,absmq_score,first_absmq,
- invertp,invert_mate_p);
- fprintf(fp,"\t%u",flag);
-
- /* 3. RNAME: chr */
- /* 4. POS: chrpos */
- print_chromosomal_pos(fp,Stage3end_chrnum(this),chrpos,Stage3end_chrlength(this),chromosome_iit);
-
-
- /* 5. MAPQ: Mapping quality */
- fprintf(fp,"\t%d",mapq_score);
-
- /* 6. CIGAR */
- fprintf(fp,"\t");
-
- if (plusp == true) {
- if (hide_soft_clips_p == true) {
- cigar_tokens = compute_cigar(cigar_tokens,/*type*/'M',
- Substring_querystart(substring1) + Substring_match_length(substring1),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- } else {
- cigar_tokens = compute_cigar(cigar_tokens,/*type*/'S',Substring_querystart(substring1),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- cigar_tokens = compute_cigar(cigar_tokens,/*type*/'M',Substring_match_length(substring1),
- /*querypos*/Substring_querystart(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- }
-
- cigar_tokens = compute_cigar(cigar_tokens,/*type*/'I',nindels,
- /*querypos*/Substring_queryend(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
-
- if (hide_soft_clips_p == true) {
- cigar_tokens = compute_cigar(cigar_tokens,/*type*/'M',
- Substring_match_length(substring2) + (querylength - Substring_queryend(substring2)),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- } else {
- cigar_tokens = compute_cigar(cigar_tokens,/*type*/'M',Substring_match_length(substring2),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- cigar_tokens = compute_cigar(cigar_tokens,/*type*/'S',querylength - Substring_queryend(substring2),
- /*querypos*/Substring_queryend(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- }
-
- } else {
- if (hide_soft_clips_p == true) {
- cigar_tokens = compute_cigar(cigar_tokens,/*type*/'M',
- (querylength - Substring_queryend(substring2)) +
- Substring_match_length(substring2),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/false);
- } else {
- cigar_tokens = compute_cigar(cigar_tokens,/*type*/'S',querylength - Substring_queryend(substring2),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/false);
- cigar_tokens = compute_cigar(cigar_tokens,/*type*/'M',Substring_match_length(substring2),
- /*querypos*/Substring_queryend(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- }
-
- cigar_tokens = compute_cigar(cigar_tokens,/*type*/'I',nindels,
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
-
- if (hide_soft_clips_p == true) {
- cigar_tokens = compute_cigar(cigar_tokens,/*type*/'M',
- Substring_match_length(substring1) +
- Substring_querystart(substring1),
- /*querypos*/Substring_queryend(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- } else {
- cigar_tokens = compute_cigar(cigar_tokens,/*type*/'M',Substring_match_length(substring1),
- /*querypos*/Substring_queryend(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- cigar_tokens = compute_cigar(cigar_tokens,/*type*/'S',Substring_querystart(substring1),
- /*querypos*/Substring_querystart(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- }
- }
- cigar_tokens = Pair_clean_cigar(cigar_tokens,/*watsonp*/true);
- print_tokens_sam(fp,cigar_tokens);
- List_free(&cigar_tokens);
-
-
- /* 7. MRNM: Mate chr */
- /* 8. MPOS: Mate chrpos */
- print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
- mate_chrpos,Stage3end_chrlength(mate),
- Stage3end_chrnum(this),chrpos,chromosome_iit);
-
-
- /* 9. ISIZE: Insert size */
- if (resulttype == CONCORDANT_UNIQ || resulttype == CONCORDANT_TRANSLOC || resulttype == CONCORDANT_MULT) {
- if (plusp == invertp) {
- fprintf(fp,"\t%d",-pairedlength);
- } else {
- fprintf(fp,"\t%d",pairedlength);
- }
- } else if (mate_chrpos == 0) {
- fprintf(fp,"\t%d",pairedlength);
- } else if (chrpos < mate_chrpos) {
- fprintf(fp,"\t%d",pairedlength);
- } else if (chrpos > mate_chrpos) {
- fprintf(fp,"\t%d",-pairedlength);
- } else if (first_read_p == true) {
- fprintf(fp,"\t%d",pairedlength);
- } else {
- fprintf(fp,"\t%d",-pairedlength);
- }
-
- /* 10. SEQ: queryseq and 11. QUAL: quality scores */
- /* Queryseq has already been inverted, so just measure plusp relative to its current state */
- fprintf(fp,"\t");
- if (plusp == true) {
- Shortread_print_chopped(fp,queryseq,hardclip_low,hardclip_high);
- fprintf(fp,"\t");
- Shortread_print_quality(fp,queryseq,hardclip_low,hardclip_high,
- quality_shift,/*show_chopped_p*/false);
- } else {
- Shortread_print_chopped_revcomp(fp,queryseq,hardclip_low,hardclip_high);
- fprintf(fp,"\t");
- Shortread_print_quality_revcomp(fp,queryseq,hardclip_low,hardclip_high,
- quality_shift,/*show_chopped_p*/false);
- }
-
- /* 12. TAGS: RG */
- if (sam_read_group_id != NULL) {
- fprintf(fp,"\tRG:Z:%s",sam_read_group_id);
- }
-
- /* 12. TAGS: XH */
- if (hardclip_low > 0 || hardclip_high > 0) {
- fprintf(fp,"\tXH:Z:");
- if (plusp == true) {
- Shortread_print_chopped_end(fp,queryseq,hardclip_low,hardclip_high);
- } else {
- Shortread_print_chopped_end_revcomp(fp,queryseq,hardclip_low,hardclip_high);
- }
- }
-
- /* 12. TAGS: XB */
- Shortread_print_barcode(fp,queryseq);
-
- /* 12. TAGS: XP. Logically should be last in reconstructing a read. */
- Shortread_print_chop(fp,queryseq,invertp);
-
- /* 12. TAGS: MD */
- fprintf(fp,"\tMD:Z:");
- printp = false;
-
- if (hide_soft_clips_p == true) {
- substring1_start = Substring_querystart_orig(substring1);
- substring1_length = Substring_match_length_orig(substring1);
- substring2_start = Substring_querystart_orig(substring2);
- substring2_length = Substring_match_length_orig(substring2);
- } else {
- substring1_start = Substring_querystart(substring1);
- substring1_length = Substring_match_length(substring1);
- substring2_start = Substring_querystart(substring2);
- substring2_length = Substring_match_length(substring2);
- }
-
- if (plusp == true) {
- genomicfwd_refdiff = Substring_genomic_refdiff(substring1);
- genomicfwd_bothdiff = Substring_genomic_bothdiff(substring1);
- matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
- &(genomicfwd_refdiff[substring1_start]),&(genomicfwd_bothdiff[substring1_start]),
- substring1_length,/*querypos*/substring1_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
-
-#if 0
- /* If MD string is supposed to include insertion, then uncomment this */
- matchlength += nindels;
-#endif
-
- genomicfwd_refdiff = Substring_genomic_refdiff(substring2);
- genomicfwd_bothdiff = Substring_genomic_bothdiff(substring2);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,matchlength,
- &(genomicfwd_refdiff[substring2_start]),&(genomicfwd_bothdiff[substring2_start]),
- substring2_length,/*querypos*/substring2_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else {
- genomicdir_refdiff = Substring_genomic_refdiff(substring2);
- genomicdir_bothdiff = Substring_genomic_bothdiff(substring2);
- if (genomicdir_bothdiff == genomicdir_refdiff) {
- genomicfwd_refdiff = (char *) MALLOCA((substring2_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring2_start]),substring2_length);
- matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
- substring2_length,/*querypos*/substring2_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- FREEA(genomicfwd_refdiff);
- } else {
- genomicfwd_refdiff = (char *) MALLOCA((substring2_length+1) * sizeof(char));
- genomicfwd_bothdiff = (char *) MALLOCA((substring2_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring2_start]),substring2_length);
- make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring2_start]),substring2_length);
- matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
- substring2_length,/*querypos*/substring2_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- FREEA(genomicfwd_bothdiff);
- FREEA(genomicfwd_refdiff);
- }
-
-#if 0
- /* If MD string is supposed to include insertion, then uncomment this */
- matchlength += nindels;
-#endif
-
- genomicdir_refdiff = Substring_genomic_refdiff(substring1);
- genomicdir_bothdiff = Substring_genomic_bothdiff(substring1);
- if (genomicdir_bothdiff == genomicdir_refdiff) {
- genomicfwd_refdiff = (char *) MALLOCA((substring1_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring1_start]),substring1_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,matchlength,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
- substring1_length,/*querypos*/substring1_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- FREEA(genomicfwd_refdiff);
- } else {
- genomicfwd_refdiff = (char *) MALLOCA((substring1_length+1) * sizeof(char));
- genomicfwd_bothdiff = (char *) MALLOCA((substring1_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring1_start]),substring1_length);
- make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring1_start]),substring1_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,matchlength,genomicfwd_refdiff,genomicfwd_bothdiff,
- substring1_length,/*querypos*/substring1_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- FREEA(genomicfwd_bothdiff);
- FREEA(genomicfwd_refdiff);
- }
- }
-
- if (printp == false) {
- fprintf(fp,"0");
- }
-
-
- /* 12. TAGS: NH */
- fprintf(fp,"\tNH:i:%d",npaths);
-
- /* 12. TAGS: HI */
- fprintf(fp,"\tHI:i:%d",pathnum);
-
- /* 12. TAGS: NM */
- /* fprintf(fp,"\tNM:i:%d",Stage3end_nmismatches_refdiff(this)); */
- fprintf(fp,"\tNM:i:%d",nmismatches_refdiff + nindels);
-
- if (snps_iit) {
- /* 12. TAGS: XW and XV */
- fprintf(fp,"\tXW:i:%d",nmismatches_bothdiff);
- fprintf(fp,"\tXV:i:%d",nmismatches_refdiff - nmismatches_bothdiff);
- }
-
- /* 12. TAGS: SM */
- fprintf(fp,"\tSM:i:%d",mapq_score);
-
- /* 12. TAGS: XQ */
- fprintf(fp,"\tXQ:i:%d",absmq_score);
-
- /* 12. TAGS: X2 */
- fprintf(fp,"\tX2:i:%d",second_absmq);
-
- /* 12. TAGS: XO */
- fprintf(fp,"\tXO:Z:%s",abbrev);
-
- /* 12. TAGS: XC */
- if (circularp == true) {
- fprintf(fp,"\tXC:A:+");
- }
-
- /* 12. TAGS: XG */
- if (Stage3end_sarrayp(this) == true) {
- fprintf(fp,"\tXG:Z:A");
- }
-
- fprintf(fp,"\n");
- return;
-}
-
-static bool
-check_cigar_insertion (Stage3end_T this, int querylength, int clipdir, int hardclip_low, int hardclip_high,
- bool first_read_p, bool circularp) {
- bool result;
- Intlist_T cigar_types = NULL;
- Substring_T substring1, substring2;
- bool plusp;
- int nindels;
-
- plusp = Stage3end_plusp(this);
-
- substring1 = Stage3end_substring1(this);
- substring2 = Stage3end_substring2(this);
-
- nindels = Stage3end_nindels(this);
-
- if (plusp == true) {
- if (hide_soft_clips_p == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_querystart(substring1) + Substring_match_length(substring1),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring1),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring1),
- /*querypos*/Substring_querystart(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- }
-
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'I',nindels,
- /*querypos*/Substring_queryend(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
-
- if (hide_soft_clips_p == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_match_length(substring2) + (querylength - Substring_queryend(substring2)),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring2),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring2),
- /*querypos*/Substring_queryend(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- }
-
- } else {
- if (hide_soft_clips_p == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- (querylength - Substring_queryend(substring2)) +
- Substring_match_length(substring2),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/false);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring2),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring2),
- /*querypos*/Substring_queryend(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- }
-
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'I',nindels,
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
-
- if (hide_soft_clips_p == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_match_length(substring1) +
- Substring_querystart(substring1),
- /*querypos*/Substring_queryend(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring1),
- /*querypos*/Substring_queryend(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring1),
- /*querypos*/Substring_querystart(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- }
- }
-
- result = check_cigar_types(cigar_types);
-
- Intlist_free(&cigar_types);
- return result;
-}
-
-
-static void
-print_deletion (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
- char *acc1, char *acc2, int pathnum, int npaths,
- int absmq_score, int first_absmq, int second_absmq, int mapq_score,
- Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
- Chrpos_T chrpos, Chrpos_T mate_chrpos, int clipdir, int hardclip_low, int hardclip_high,
- Resulttype_T resulttype, bool first_read_p, int npaths_mate,
- int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
- bool circularp) {
- unsigned int flag = 0U;
- Substring_T substring1, substring2;
- int nmismatches_refdiff = 0, nmismatches_bothdiff = 0, querylength;
- char *genomicfwd_refdiff, *genomicfwd_bothdiff, *genomicfwd_deletion,
- *genomicdir_refdiff, *genomicdir_bothdiff;
- int substring1_start, substring2_start, substring1_length, substring2_length, nindels;
- bool plusp, printp;
-
- querylength = Shortread_fulllength(queryseq);
- plusp = Stage3end_plusp(this);
-
- substring1 = Stage3end_substring1(this);
- substring2 = Stage3end_substring2(this);
-
-
-#if 0
- /* These cases are checked below */
- if (hardclip_low >= Substring_querystart(substring2)) {
- nindels = 0;
- } else if (querylength - hardclip_high <= Substring_queryend(substring1)) {
- nindels = 0;
- } else {
- nindels = Stage3end_nindels(this); /* nindels is positive */
- }
-#else
- nindels = Stage3end_nindels(this); /* nindels is positive */
-#endif
-
-
- /* 1. QNAME */
- if (acc2 == NULL) {
- fprintf(fp,"%s",acc1);
- } else {
- fprintf(fp,"%s,%s",acc1,acc2);
- }
-
- /* 2. FLAG */
- flag = SAM_compute_flag(plusp,mate,resulttype,first_read_p,
- pathnum,npaths,npaths_mate,absmq_score,first_absmq,
- invertp,invert_mate_p);
- fprintf(fp,"\t%u",flag);
-
- /* 3. RNAME: chr */
- /* 4. POS: chrpos */
- print_chromosomal_pos(fp,Stage3end_chrnum(this),chrpos,Stage3end_chrlength(this),chromosome_iit);
-
-
- /* 5. MAPQ: Mapping quality */
- fprintf(fp,"\t%d",mapq_score);
-
- /* 6. CIGAR */
- fprintf(fp,"\t");
-
- if (hide_soft_clips_p == true) {
- substring1_start = Substring_querystart_orig(substring1);
- substring1_length = Substring_match_length_orig(substring1);
- substring2_start = Substring_querystart_orig(substring2);
- substring2_length = Substring_match_length_orig(substring2);
- } else {
- substring1_start = Substring_querystart(substring1);
- substring1_length = Substring_match_length(substring1);
- substring2_start = Substring_querystart(substring2);
- substring2_length = Substring_match_length(substring2);
- }
-
- if (plusp == true) {
- if (hide_soft_clips_p == true) {
- if (/*nindels > 0 &&*/ hardclip_low < substring1_start + substring1_length && hardclip_high < querylength - substring2_start) {
- print_cigar(fp,/*type*/'M',
- Substring_querystart(substring1) +
- Substring_match_length(substring1),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- fprintf(fp,"%dD",nindels);
- print_cigar(fp,/*type*/'M',
- Substring_match_length(substring2) +
- (querylength - Substring_queryend(substring2)),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else {
- print_cigar(fp,/*type*/'M',
- Substring_querystart(substring1) +
- (Substring_match_length(substring1) +
- Substring_match_length(substring2)) +
- (querylength - Substring_queryend(substring2)),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/true);
- }
-
-
- } else {
- print_cigar(fp,/*type*/'S',Substring_querystart(substring1),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- if (/*nindels > 0 &&*/ hardclip_low < substring1_start + substring1_length && hardclip_high < querylength - substring2_start) {
- print_cigar(fp,/*type*/'M',Substring_match_length(substring1),
- /*querypos*/Substring_querystart(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- fprintf(fp,"%dD",nindels);
- print_cigar(fp,/*type*/'M',Substring_match_length(substring2),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- } else {
- print_cigar(fp,/*type*/'M',Substring_match_length(substring1) + Substring_match_length(substring2),
- /*querypos*/Substring_querystart(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- }
- print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring2),
- /*querypos*/Substring_queryend(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- }
-
- } else {
- if (hide_soft_clips_p == true) {
- if (/*nindels > 0 &&*/ hardclip_low < querylength - substring2_start && hardclip_high < substring1_start + substring1_length) {
- print_cigar(fp,/*type*/'M',
- (querylength - Substring_queryend(substring2)) +
- Substring_match_length(substring2),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/false);
- fprintf(fp,"%dD",nindels);
- print_cigar(fp,/*type*/'M',
- Substring_match_length(substring1) +
- Substring_querystart(substring1),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- } else {
- print_cigar(fp,/*type*/'M',
- (querylength - Substring_queryend(substring2)) +
- (Substring_match_length(substring2) + Substring_match_length(substring1)) +
- Substring_querystart(substring1),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- }
-
- } else {
- print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring2),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- if (/*nindels > 0 &&*/ hardclip_low < querylength - substring2_start && hardclip_high < substring1_start + substring1_length) {
- print_cigar(fp,/*type*/'M',Substring_match_length(substring2),
- /*querypos*/Substring_queryend(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- fprintf(fp,"%dD",nindels);
- print_cigar(fp,/*type*/'M',Substring_match_length(substring1),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- } else {
- print_cigar(fp,/*type*/'M',Substring_match_length(substring2) + Substring_match_length(substring1),
- /*querypos*/Substring_queryend(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- }
- print_cigar(fp,/*type*/'S',Substring_querystart(substring1),
- /*querypos*/Substring_querystart(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- }
- }
-
- /* 7. MRNM: Mate chr */
- /* 8. MPOS: Mate chrpos */
- print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
- mate_chrpos,Stage3end_chrlength(mate),
- Stage3end_chrnum(this),chrpos,chromosome_iit);
-
-
- /* 9. ISIZE: Insert size */
- if (resulttype == CONCORDANT_UNIQ || resulttype == CONCORDANT_TRANSLOC || resulttype == CONCORDANT_MULT) {
- if (plusp == invertp) {
- fprintf(fp,"\t%d",-pairedlength);
- } else {
- fprintf(fp,"\t%d",pairedlength);
- }
- } else if (mate_chrpos == 0) {
- fprintf(fp,"\t%d",pairedlength);
- } else if (chrpos < mate_chrpos) {
- fprintf(fp,"\t%d",pairedlength);
- } else if (chrpos > mate_chrpos) {
- fprintf(fp,"\t%d",-pairedlength);
- } else if (first_read_p == true) {
- fprintf(fp,"\t%d",pairedlength);
- } else {
- fprintf(fp,"\t%d",-pairedlength);
- }
-
-
- /* 10. SEQ: queryseq and 11. QUAL: quality scores */
- /* Queryseq has already been inverted, so just measure plusp relative to its current state */
- fprintf(fp,"\t");
- if (plusp == true) {
- Shortread_print_chopped(fp,queryseq,hardclip_low,hardclip_high);
- fprintf(fp,"\t");
- Shortread_print_quality(fp,queryseq,hardclip_low,hardclip_high,
- quality_shift,/*show_chopped_p*/false);
- } else {
- Shortread_print_chopped_revcomp(fp,queryseq,hardclip_low,hardclip_high);
- fprintf(fp,"\t");
- Shortread_print_quality_revcomp(fp,queryseq,hardclip_low,hardclip_high,
- quality_shift,/*show_chopped_p*/false);
- }
-
- /* 12. TAGS: RG */
- if (sam_read_group_id != NULL) {
- fprintf(fp,"\tRG:Z:%s",sam_read_group_id);
- }
-
- /* 12. TAGS: XH */
- if (hardclip_low > 0 || hardclip_high > 0) {
- fprintf(fp,"\tXH:Z:");
- if (plusp == true) {
- Shortread_print_chopped_end(fp,queryseq,hardclip_low,hardclip_high);
- } else {
- Shortread_print_chopped_end_revcomp(fp,queryseq,hardclip_low,hardclip_high);
- }
- }
-
- /* 12. TAGS: XB */
- Shortread_print_barcode(fp,queryseq);
-
- /* 12. TAGS: XP. Logically should be last in reconstructing a read. */
- Shortread_print_chop(fp,queryseq,invertp);
-
- /* 12. TAGS: MD */
- fprintf(fp,"\tMD:Z:");
- printp = false;
-
- if (plusp == true) {
- genomicfwd_refdiff = Substring_genomic_refdiff(substring1);
- genomicfwd_bothdiff = Substring_genomic_bothdiff(substring1);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
- &(genomicfwd_refdiff[substring1_start]),&(genomicfwd_bothdiff[substring1_start]),
- substring1_length,/*querypos*/substring1_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
-
- debug1(printf("\nhardclip_low %d, hardclip_high %d\n",hardclip_low,hardclip_high));
- debug1(printf("substring1_length %d, substring2_length %d\n",substring1_length,substring2_length));
- debug1(printf("substring1 %d..%d, substring2 %d..%d\n",
- Substring_querystart(substring1),Substring_queryend(substring1),
- Substring_querystart(substring2),Substring_queryend(substring2)));
- debug1(printf("trim1: %d..%d, trim2 %d..%d\n",
- Substring_trim_left(substring1),Substring_trim_right(substring1),
- Substring_trim_left(substring2),Substring_trim_right(substring2)));
- if (hardclip_low < substring1_start + substring1_length && hardclip_high < querylength - substring2_start) {
- fprintf(fp,"^%s",Stage3end_deletion_string(this));
- }
-
- genomicfwd_refdiff = Substring_genomic_refdiff(substring2);
- genomicfwd_bothdiff = Substring_genomic_bothdiff(substring2);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
- &(genomicfwd_refdiff[substring2_start]),&(genomicfwd_bothdiff[substring2_start]),
- substring2_length,/*querypos*/substring2_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
-
- } else {
- genomicdir_refdiff = Substring_genomic_refdiff(substring2);
- genomicdir_bothdiff = Substring_genomic_bothdiff(substring2);
- if (genomicdir_bothdiff == genomicdir_refdiff) {
- genomicfwd_refdiff = (char *) MALLOCA((substring2_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring2_start]),substring2_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
- substring2_length,/*querypos*/substring2_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- FREEA(genomicfwd_refdiff);
- } else {
- genomicfwd_refdiff = (char *) MALLOCA((substring2_length+1) * sizeof(char));
- genomicfwd_bothdiff = (char *) MALLOCA((substring2_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring2_start]),substring2_length);
- make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring2_start]),substring2_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
- substring2_length,/*querypos*/substring2_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- FREEA(genomicfwd_bothdiff);
- FREEA(genomicfwd_refdiff);
- }
-
-
- debug1(printf("\nhardclip_low %d, hardclip_high %d\n",hardclip_low,hardclip_high));
- debug1(printf("substring2_length %d, substring1_length %d\n",substring2_length,substring1_length));
- debug1(printf("substring1 %d..%d, substring2 %d..%d\n",
- Substring_querystart(substring1),Substring_queryend(substring1),
- Substring_querystart(substring2),Substring_queryend(substring2)));
- debug1(printf("trim1: %d..%d, trim2 %d..%d\n",
- Substring_trim_left(substring1),Substring_trim_right(substring1),
- Substring_trim_left(substring2),Substring_trim_right(substring2)));
- if (hardclip_low < querylength - substring2_start && hardclip_high < substring1_start + substring1_length) {
- /* Deletion string: Potential problem if followed by a mismatch, but can be resolved by looking at CIGAR string */
- genomicfwd_deletion = (char *) MALLOCA((nindels+1) * sizeof(char));
- make_complement_buffered(genomicfwd_deletion,Stage3end_deletion_string(this),nindels);
- fprintf(fp,"^%s",genomicfwd_deletion);
- FREEA(genomicfwd_deletion);
- }
-
- genomicdir_refdiff = Substring_genomic_refdiff(substring1);
- genomicdir_bothdiff = Substring_genomic_bothdiff(substring1);
- if (genomicdir_bothdiff == genomicdir_refdiff) {
- genomicfwd_refdiff = (char *) MALLOCA((substring1_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring1_start]),substring1_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
- substring1_length,/*querypos*/substring1_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- FREEA(genomicfwd_refdiff);
- } else {
- genomicfwd_refdiff = (char *) MALLOCA((substring1_length+1) * sizeof(char));
- genomicfwd_bothdiff = (char *) MALLOCA((substring1_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring1_start]),substring1_length);
- make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring1_start]),substring1_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
- substring1_length,/*querypos*/substring1_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- FREEA(genomicfwd_bothdiff);
- FREEA(genomicfwd_refdiff);
- }
-
- }
- if (printp == false) {
- fprintf(fp,"0");
- }
-
-
- /* 12. TAGS: NH */
- fprintf(fp,"\tNH:i:%d",npaths);
-
- /* 12. TAGS: HI */
- fprintf(fp,"\tHI:i:%d",pathnum);
-
- /* 12. TAGS: NM */
- /* fprintf(fp,"\tNM:i:%d",Stage3end_nmismatches_refdiff(this)); */
- fprintf(fp,"\tNM:i:%d",nmismatches_refdiff + nindels);
-
- if (snps_iit) {
- /* 12. TAGS: XW and XV */
- fprintf(fp,"\tXW:i:%d",nmismatches_bothdiff);
- fprintf(fp,"\tXV:i:%d",nmismatches_refdiff - nmismatches_bothdiff);
- }
-
- /* 12. TAGS: SM */
- fprintf(fp,"\tSM:i:%d",mapq_score);
-
- /* 12. TAGS: XQ */
- fprintf(fp,"\tXQ:i:%d",absmq_score);
-
- /* 12. TAGS: X2 */
- fprintf(fp,"\tX2:i:%d",second_absmq);
-
- /* 12. TAGS: XO */
- fprintf(fp,"\tXO:Z:%s",abbrev);
-
- /* 12. TAGS: XC */
- if (circularp == true) {
- fprintf(fp,"\tXC:A:+");
- }
-
- /* 12. TAGS: XG */
- if (Stage3end_sarrayp(this) == true) {
- fprintf(fp,"\tXG:Z:A");
- }
-
- fprintf(fp,"\n");
- return;
-}
-
-static bool
-check_cigar_deletion (Stage3end_T this, int querylength, int clipdir, int hardclip_low, int hardclip_high,
- bool first_read_p, bool circularp) {
- bool result;
- Intlist_T cigar_types = NULL;
- Substring_T substring1, substring2;
- int substring1_start, substring2_start, substring1_length;
- bool plusp;
-
- plusp = Stage3end_plusp(this);
-
- substring1 = Stage3end_substring1(this);
- substring2 = Stage3end_substring2(this);
-
- if (hide_soft_clips_p == true) {
- substring1_start = Substring_querystart_orig(substring1);
- substring1_length = Substring_match_length_orig(substring1);
- substring2_start = Substring_querystart_orig(substring2);
- /* substring2_length = Substring_match_length_orig(substring2); */
- } else {
- substring1_start = Substring_querystart(substring1);
- substring1_length = Substring_match_length(substring1);
- substring2_start = Substring_querystart(substring2);
- /* substring2_length = Substring_match_length(substring2); */
- }
-
- if (plusp == true) {
- if (hide_soft_clips_p == true) {
- if (/*nindels > 0 &&*/ hardclip_low < substring1_start + substring1_length && hardclip_high < querylength - substring2_start) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_querystart(substring1) +
- Substring_match_length(substring1),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- cigar_types = Intlist_push(cigar_types,'D');
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_match_length(substring2) +
- (querylength - Substring_queryend(substring2)),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_querystart(substring1) +
- (Substring_match_length(substring1) +
- Substring_match_length(substring2)) +
- (querylength - Substring_queryend(substring2)),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/true);
- }
-
-
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring1),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- if (/*nindels > 0 &&*/ hardclip_low < substring1_start + substring1_length && hardclip_high < querylength - substring2_start) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring1),
- /*querypos*/Substring_querystart(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- cigar_types = Intlist_push(cigar_types,'D');
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring2),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring1) + Substring_match_length(substring2),
- /*querypos*/Substring_querystart(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- }
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring2),
- /*querypos*/Substring_queryend(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- }
-
- } else {
- if (hide_soft_clips_p == true) {
- if (/*nindels > 0 &&*/ hardclip_low < querylength - substring2_start && hardclip_high < substring1_start + substring1_length) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- (querylength - Substring_queryend(substring2)) +
- Substring_match_length(substring2),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/false);
- cigar_types = Intlist_push(cigar_types,'D');
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_match_length(substring1) +
- Substring_querystart(substring1),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- (querylength - Substring_queryend(substring2)) +
- (Substring_match_length(substring2) + Substring_match_length(substring1)) +
- Substring_querystart(substring1),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- }
-
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring2),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- if (/*nindels > 0 &&*/ hardclip_low < querylength - substring2_start && hardclip_high < substring1_start + substring1_length) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring2),
- /*querypos*/Substring_queryend(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- cigar_types = Intlist_push(cigar_types,'D');
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring1),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring2) + Substring_match_length(substring1),
- /*querypos*/Substring_queryend(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- }
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring1),
- /*querypos*/Substring_querystart(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- }
- }
-
- result = check_cigar_types(cigar_types);
-
- Intlist_free(&cigar_types);
- return result;
-}
-
-
-static void
-halfdonor_dinucleotide (char *donor1, char *donor2, Substring_T donor) {
- bool sensep;
- char *genomic;
- int substring_start, substring_length;
-
- /* sensedir for chimera must be SENSE_FORWARD or SENSE_ANTI, not SENSE_NULL */
- sensep = Substring_chimera_sensep(donor);
-
- substring_start = Substring_querystart(donor);
- genomic = Substring_genomic_refdiff(donor);
-
- if (sensep == true) {
- substring_length = Substring_match_length(donor);
- *donor1 = toupper(genomic[substring_start+substring_length]);
- *donor2 = toupper(genomic[substring_start+substring_length+1]);
-
- } else { /* sensep == false */
- *donor2 = toupper(complCode[(int) genomic[substring_start-2]]);
- *donor1 = toupper(complCode[(int) genomic[substring_start-1]]);
- }
-
- return;
-}
-
-static void
-halfacceptor_dinucleotide (char *acceptor2, char *acceptor1, Substring_T acceptor) {
- bool sensep;
- char *genomic;
- int substring_start, substring_length;
-
- /* sensedir for chimera must be SENSE_FORWARD or SENSE_ANTI, not SENSE_NULL */
- sensep = Substring_chimera_sensep(acceptor);
-
- substring_start = Substring_querystart(acceptor);
- genomic = Substring_genomic_refdiff(acceptor);
-
- if (sensep == true) {
- *acceptor2 = toupper(genomic[substring_start-2]);
- *acceptor1 = toupper(genomic[substring_start-1]);
-
- } else { /* sensep == false */
- substring_length = Substring_match_length(acceptor);
- *acceptor1 = toupper(complCode[(int) genomic[substring_start+substring_length]]);
- *acceptor2 = toupper(complCode[(int) genomic[substring_start+substring_length+1]]);
- }
-
- return;
-}
-
-
-
-static void
-print_halfdonor (FILE *fp, char *abbrev, Substring_T donor, Stage3end_T this, Stage3end_T mate,
- char *acc1, char *acc2, int pathnum, int npaths, int absmq_score, int first_absmq, int second_absmq, int mapq_score,
- Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
- Chrpos_T concordant_chrpos, Chrpos_T donor_chrpos, Chrpos_T acceptor_chrpos, Chrpos_T mate_chrpos,
- int clipdir, int hardclip_low, int hardclip_high, Resulttype_T resulttype, bool first_read_p, int npaths_mate,
- int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
- bool use_hardclip_p, bool print_xt_p, char donor_strand, char acceptor_strand,
- char *donor_chr, char *acceptor_chr, char donor1, char donor2, char acceptor2, char acceptor1,
- double donor_prob, double acceptor_prob, bool circularp) {
- unsigned int flag = 0U;
- int nmismatches_refdiff = 0, nmismatches_bothdiff = 0, querylength;
- bool sensep;
- char *genomicfwd_refdiff, *genomicfwd_bothdiff, *genomicdir_refdiff, *genomicdir_bothdiff;
- int substring_start, substring_length;
- int transloc_hardclip_low, transloc_hardclip_high;
- bool plusp, printp;
- bool start_ambig, end_ambig;
- int amb_length_start, amb_length_end;
- int n, i;
- Univcoord_T *start_ambcoords, *end_ambcoords, splicecoord;
-#ifdef PRINT_AMBIG_COORDS
- Univcoord_T chroffset;
-#endif
-
-
- querylength = Shortread_fulllength(queryseq);
- plusp = Substring_plusp(donor);
-
- /* 1. QNAME */
- if (acc2 == NULL) {
- fprintf(fp,"%s",acc1);
- } else {
- fprintf(fp,"%s,%s",acc1,acc2);
- }
-
- /* 2. FLAG */
- flag = SAM_compute_flag(plusp,mate,resulttype,first_read_p,
- pathnum,npaths,npaths_mate,absmq_score,first_absmq,
- invertp,invert_mate_p);
- fprintf(fp,"\t%u",flag);
-
- /* 3. RNAME: chr */
- /* 4. POS: chrpos */
- print_chromosomal_pos(fp,Substring_chrnum(donor),donor_chrpos,Substring_chrlength(donor),chromosome_iit);
-
-
- /* 5. MAPQ: Mapping quality */
- fprintf(fp,"\t%d",mapq_score);
-
- /* 6. CIGAR */
- fprintf(fp,"\t");
- /* sensedir for chimera must be SENSE_FORWARD or SENSE_ANTI, not SENSE_NULL */
- /* sensedir = Substring_chimera_sensedir(donor); */
- sensep = Substring_chimera_sensep(donor);
-
- if (use_hardclip_p == true) {
- if (sensep == plusp) {
- transloc_hardclip_low = 0;
- if (plusp == true) {
- /* sensep true */
- transloc_hardclip_high = querylength - Substring_queryend(donor);
-
- } else {
- /* sensep false */
- transloc_hardclip_high = Substring_querystart(donor);
- }
-
- } else { /* sensep != Substring_plusp(donor) */
- transloc_hardclip_high = 0;
- if (plusp == true) {
- transloc_hardclip_low = Substring_querystart(donor);
-
- } else {
- transloc_hardclip_low = querylength - Substring_queryend(donor);
- }
- }
-
- if (transloc_hardclip_low > hardclip_low) {
- hardclip_low = transloc_hardclip_low;
- }
- if (transloc_hardclip_high > hardclip_high) {
- hardclip_high = transloc_hardclip_high;
- }
- }
-
-
- if (sensep == plusp) {
- if (plusp == true) {
- /* sensep true */
- assert(Substring_chimera_pos(donor) == Substring_queryend(donor));
- if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/'M',
- Substring_querystart(donor) +
- Substring_match_length(donor),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- print_cigar(fp,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(donor),
- /*querypos*/Substring_queryend(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
-
- } else {
- print_cigar(fp,/*type*/'S',Substring_querystart(donor),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- print_cigar(fp,/*type*/'M',Substring_match_length(donor),
- /*querypos*/Substring_querystart(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- print_cigar(fp,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(donor),
- /*querypos*/Substring_queryend(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- }
-
- } else {
- /* sensep false */
- assert(Substring_chimera_pos(donor) == Substring_querystart(donor));
- if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/'M',
- (querylength - Substring_queryend(donor)) +
- Substring_match_length(donor),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/false);
- print_cigar(fp,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(donor),
- /*querypos*/Substring_querystart(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
-
- } else {
- print_cigar(fp,/*type*/'S',querylength - Substring_queryend(donor),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- print_cigar(fp,/*type*/'M',Substring_match_length(donor),
- /*querypos*/Substring_queryend(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- print_cigar(fp,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(donor),
- /*querypos*/Substring_querystart(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- }
- }
-
- } else { /* sensep != Substring_plusp(donor) */
- if (plusp == true) {
- assert(Substring_chimera_pos(donor) == Substring_querystart(donor));
- if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(donor),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- print_cigar(fp,/*type*/'M',
- Substring_match_length(donor) +
- (querylength - Substring_queryend(donor)),
- /*querypos*/Substring_querystart(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else {
- print_cigar(fp,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(donor),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- print_cigar(fp,/*type*/'M',Substring_match_length(donor),
- /*querypos*/Substring_querystart(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- print_cigar(fp,/*type*/'S',querylength - Substring_queryend(donor),
- /*querypos*/Substring_queryend(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- }
-
- } else {
- assert(Substring_chimera_pos(donor) == Substring_queryend(donor));
- if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(donor),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- print_cigar(fp,/*type*/'M',
- Substring_match_length(donor) +
- Substring_querystart(donor),
- /*querypos*/Substring_queryend(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- } else {
- print_cigar(fp,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(donor),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- print_cigar(fp,/*type*/'M',Substring_match_length(donor),
- /*querypos*/Substring_queryend(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- print_cigar(fp,/*type*/'S',Substring_querystart(donor),
- /*querypos*/Substring_querystart(donor),querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/true);
- }
- }
- }
-
- /* 7. MRNM: Mate chr */
- /* 8. MPOS: Mate chrpos */
- /* For anchor_chrnum, previously used Stage3end_chrnum(this), but this is 0 */
- print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
- mate_chrpos,Stage3end_chrlength(mate),
- /*anchor_chrnum*/Substring_chrnum(donor),donor_chrpos,chromosome_iit);
-
-
- /* 9. ISIZE: Insert size */
- if (resulttype == CONCORDANT_UNIQ || resulttype == CONCORDANT_TRANSLOC || resulttype == CONCORDANT_MULT) {
- if (plusp == invertp) {
- fprintf(fp,"\t%d",-pairedlength);
- } else {
- fprintf(fp,"\t%d",pairedlength);
- }
- } else if (mate_chrpos == 0) {
- fprintf(fp,"\t%d",pairedlength);
- } else if (concordant_chrpos < mate_chrpos) {
- fprintf(fp,"\t%d",pairedlength);
- } else if (concordant_chrpos > mate_chrpos) {
- fprintf(fp,"\t%d",-pairedlength);
- } else if (first_read_p == true) {
- fprintf(fp,"\t%d",pairedlength);
- } else {
- fprintf(fp,"\t%d",-pairedlength);
- }
-
-
- /* 10. SEQ: queryseq and 11. QUAL: quality scores */
- /* Queryseq has already been inverted, so just measure plusp relative to its current state */
- fprintf(fp,"\t");
- if (plusp == true) {
- Shortread_print_chopped(fp,queryseq,hardclip_low,hardclip_high);
- fprintf(fp,"\t");
- Shortread_print_quality(fp,queryseq,hardclip_low,hardclip_high,
- quality_shift,/*show_chopped_p*/false);
- } else {
- Shortread_print_chopped_revcomp(fp,queryseq,hardclip_low,hardclip_high);
- fprintf(fp,"\t");
- Shortread_print_quality_revcomp(fp,queryseq,hardclip_low,hardclip_high,
- quality_shift,/*show_chopped_p*/false);
- }
-
-
- /* 12. TAGS: RG */
- if (sam_read_group_id != NULL) {
- fprintf(fp,"\tRG:Z:%s",sam_read_group_id);
- }
-
- /* 12. TAGS: XH */
- if (hardclip_low > 0 || hardclip_high > 0) {
- fprintf(fp,"\tXH:Z:");
- if (plusp == true) {
- Shortread_print_chopped_end(fp,queryseq,hardclip_low,hardclip_high);
- } else {
- Shortread_print_chopped_end_revcomp(fp,queryseq,hardclip_low,hardclip_high);
- }
- }
-
- /* 12. TAGS: XB */
- Shortread_print_barcode(fp,queryseq);
-
- /* 12. TAGS: XP. Logically should be last in reconstructing a read. */
- Shortread_print_chop(fp,queryseq,invertp);
-
- /* 12. TAGS: MD */
- fprintf(fp,"\tMD:Z:");
- printp = false;
-
- if (hide_soft_clips_p == true) {
- substring_start = Substring_querystart_orig(donor);
- substring_length = Substring_match_length_orig(donor);
- } else {
- substring_start = Substring_querystart(donor);
- substring_length = Substring_match_length(donor);
- }
-
- if (use_hardclip_p == false) {
- genomicdir_refdiff = Substring_genomic_refdiff(donor);
- genomicdir_bothdiff = Substring_genomic_bothdiff(donor);
- if (plusp == true) {
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
- &(genomicdir_refdiff[substring_start]),&(genomicdir_bothdiff[substring_start]),
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else if (genomicdir_bothdiff == genomicdir_refdiff) {
- genomicfwd_refdiff = (char *) MALLOCA((querylength+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- FREEA(genomicfwd_refdiff);
- } else {
- genomicfwd_refdiff = (char *) MALLOCA((querylength+1) * sizeof(char));
- genomicfwd_bothdiff = (char *) MALLOCA((querylength+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
- make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring_start]),substring_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- FREEA(genomicfwd_bothdiff);
- FREEA(genomicfwd_refdiff);
- }
-
- } else if (sensep == true) {
- if (plusp == true) {
- genomicfwd_refdiff = Substring_genomic_refdiff(donor);
- genomicfwd_bothdiff = Substring_genomic_bothdiff(donor);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
- &(genomicfwd_refdiff[substring_start]),&(genomicfwd_bothdiff[substring_start]),
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else {
- genomicdir_refdiff = Substring_genomic_refdiff(donor);
- genomicdir_bothdiff = Substring_genomic_bothdiff(donor);
- if (genomicdir_bothdiff == genomicdir_refdiff) {
- genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- FREEA(genomicfwd_refdiff);
- } else {
- genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
- genomicfwd_bothdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
- make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring_start]),substring_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- FREEA(genomicfwd_bothdiff);
- FREEA(genomicfwd_refdiff);
- }
- }
-
- } else { /* sensep == false */
- if (plusp == true) {
- genomicfwd_refdiff = Substring_genomic_refdiff(donor);
- genomicfwd_bothdiff = Substring_genomic_bothdiff(donor);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
- &(genomicfwd_refdiff[substring_start]),&(genomicfwd_bothdiff[substring_start]),
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else {
- genomicdir_refdiff = Substring_genomic_refdiff(donor);
- genomicdir_bothdiff = Substring_genomic_refdiff(donor);
- if (genomicdir_bothdiff == genomicdir_refdiff) {
- genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- FREEA(genomicfwd_refdiff);
- } else {
- genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
- genomicfwd_bothdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
- make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring_start]),substring_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- FREEA(genomicfwd_bothdiff);
- FREEA(genomicfwd_refdiff);
- }
- }
- }
- if (printp == false) {
- fprintf(fp,"0");
- }
-
-
- /* 12. TAGS: NH */
- fprintf(fp,"\tNH:i:%d",npaths);
-
- /* 12. TAGS: HI */
- fprintf(fp,"\tHI:i:%d",pathnum);
-
- /* 12. TAGS: NM */
- /* fprintf(fp,"\tNM:i:%d",Substring_nmismatches_refdiff(donor)); */
- fprintf(fp,"\tNM:i:%d",nmismatches_refdiff);
-
- if (snps_iit) {
- /* 12. TAGS: XW and XV */
- fprintf(fp,"\tXW:i:%d",nmismatches_bothdiff);
- fprintf(fp,"\tXV:i:%d",nmismatches_refdiff - nmismatches_bothdiff);
- }
-
- /* 12. TAGS: SM */
- fprintf(fp,"\tSM:i:%d",mapq_score);
-
- /* 12. TAGS: XQ */
- fprintf(fp,"\tXQ:i:%d",absmq_score);
-
- /* 12. TAGS: X2 */
- fprintf(fp,"\tX2:i:%d",second_absmq);
-
- /* 12. TAGS: XO */
- fprintf(fp,"\tXO:Z:%s",abbrev);
-
- /* 12. TAGS: XS */
-#if 0
- /* Not necessary to compute, because already computed by print_exon_exon */
- /* sensedir for chimera must be SENSE_FORWARD or SENSE_ANTI, not SENSE_NULL */
- if (sensedir == SENSE_FORWARD) {
- if (plusp == true) {
- fprintf(fp,"\tXS:A:+");
- } else {
- fprintf(fp,"\tXS:A:-");
- }
- } else if (sensedir == SENSE_ANTI) {
- if (plusp == true) {
- fprintf(fp,"\tXS:A:-");
- } else {
- fprintf(fp,"\tXS:A:+");
- }
- } else if (force_xs_direction_p == true) {
- fprintf(fp,"\tXS:A:+");
- } else {
- fprintf(fp,"\tXS:A:?");
- }
-#else
- fprintf(fp,"\tXS:A:%c",donor_strand);
-#endif
-
-
-
- /* 12. TAGS: XA */
- if ((start_ambig = Stage3end_start_ambiguous_p(this)) == true ||
- (end_ambig = Stage3end_end_ambiguous_p(this)) == true) {
- fprintf(fp,"\tXA:Z:");
-
- if (plusp == true) {
- if ((n = Stage3end_start_nambcoords(this)) > 0) {
- assert(sensep == false);
- start_ambcoords = Stage3end_start_ambcoords(this);
- splicecoord = Substring_alignstart(donor);
-#ifdef PRINT_AMBIG_COORDS
- chroffset = Substring_chroffset(donor);
- fprintf(fp,"%u",start_ambcoords[0] - chroffset + 1U);
- for (i = 1; i < n; i++) {
- fprintf(fp,",%u",start_ambcoords[i] - chroffset + 1U);
- }
-#else
- splicecoord = Substring_alignstart(donor);
- fprintf(fp,"%u",splicecoord - start_ambcoords[0]);
- for (i = 1; i < n; i++) {
- fprintf(fp,",%u",splicecoord - start_ambcoords[i]);
- }
-#endif
- }
- fprintf(fp,"|");
- if ((n = Stage3end_end_nambcoords(this)) > 0) {
- assert(sensep == true);
- end_ambcoords = Stage3end_end_ambcoords(this);
-#ifdef PRINT_AMBIG_COORDS
- chroffset = Substring_chroffset(donor);
- fprintf(fp,"%u",end_ambcoords[0] - chroffset + 1U);
- for (i = 1; i < n; i++) {
- fprintf(fp,",%u",end_ambcoords[i] - chroffset + 1U);
- }
-#else
- splicecoord = Substring_alignend(donor);
- fprintf(fp,"%u",end_ambcoords[0] - splicecoord);
- for (i = 1; i < n; i++) {
- fprintf(fp,",%u",end_ambcoords[i] - splicecoord);
- }
-#endif
- }
-
- } else {
- if ((n = Stage3end_end_nambcoords(this)) > 0) {
- assert(sensep == true);
- end_ambcoords = Stage3end_end_ambcoords(this);
-#ifdef PRINT_AMBIG_COORDS
- chroffset = Substring_chroffset(donor);
- fprintf(fp,"%u",end_ambcoords[0] - chroffset + 1U);
- for (i = 1; i < n; i++) {
- fprintf(fp,",%u",end_ambcoords[i] - chroffset + 1U);
- }
-#else
- splicecoord = Substring_alignend(donor);
- fprintf(fp,"%u",splicecoord - end_ambcoords[0]);
- for (i = 1; i < n; i++) {
- fprintf(fp,",%u",splicecoord - end_ambcoords[i]);
- }
-#endif
+ if (hide_soft_clips_p == true) {
+ print_cigar(fp,/*type*/'M',
+ Substring_querystart(substring) +
+ Substring_match_length(substring),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ } else {
+ print_cigar(fp,/*type*/'S',Substring_querystart(substring),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ print_cigar(fp,/*type*/'M',Substring_match_length(substring),
+ /*querypos*/Substring_querystart(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
}
- fprintf(fp,"|");
- if ((n = Stage3end_start_nambcoords(this)) > 0) {
- assert(sensep == false);
- start_ambcoords = Stage3end_start_ambcoords(this);
-#ifdef PRINT_AMBIG_COORDS
- chroffset = Substring_chroffset(donor);
- fprintf(fp,"%u",start_ambcoords[0] - chroffset + 1U);
- for (i = 1; i < n; i++) {
- fprintf(fp,",%u",start_ambcoords[i] - chroffset + 1U);
+ p = List_next(p);
+
+ while (p != endp && Substring_queryend((Substring_T) List_head(p)) < querylength - hardclip_high) {
+ if ((type = Junction_type(post_junction)) == DEL_JUNCTION) {
+ FPRINTF(fp,"%dD",Junction_nindels(post_junction));
+ nindels += Junction_nindels(post_junction);
+ } else if (type == INS_JUNCTION) {
+ FPRINTF(fp,"%dI",Junction_nindels(post_junction));
+ nindels += Junction_nindels(post_junction);
+ } else if (type == SPLICE_JUNCTION) {
+ FPRINTF(fp,"%uN",Junction_splice_distance(post_junction));
}
-#else
- splicecoord = Substring_alignstart(donor);
- fprintf(fp,"%u",start_ambcoords[0] - splicecoord);
- for (i = 1; i < n; i++) {
- fprintf(fp,",%u",start_ambcoords[i] - splicecoord);
+ q = List_next(q);
+ if (q == NULL) {
+ } else {
+ post_junction = (Junction_T) List_head(q);
}
-#endif
- }
- }
- }
-
- /* 12. TAGS: XT */
- if (print_xt_p == true) {
- fprintf(fp,"\tXT:Z:%c%c-%c%c,%.2f,%.2f",donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob);
- fprintf(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,donor_chrpos,acceptor_strand,acceptor_chr,acceptor_chrpos);
- }
-
- /* 12. TAGS: XC */
- if (circularp == true) {
- fprintf(fp,"\tXC:A:+");
- }
-
- /* 12. TAGS: XG */
- if (Stage3end_sarrayp(this) == true) {
- fprintf(fp,"\tXG:Z:A");
- }
-
- fprintf(fp,"\n");
- return;
-}
-
-
-static bool
-check_cigar_halfdonor (Substring_T donor, int querylength, int clipdir, int hardclip_low, int hardclip_high,
- bool first_read_p, bool circularp) {
- bool result;
- Intlist_T cigar_types = NULL;
- bool plusp, sensep;
- bool use_hardclip_p = false;
- int transloc_hardclip_low, transloc_hardclip_high;
-
- plusp = Substring_plusp(donor);
-
- sensep = Substring_chimera_sensep(donor);
-
- if (use_hardclip_p == true) {
- if (sensep == plusp) {
- transloc_hardclip_low = 0;
- if (plusp == true) {
- /* sensep true */
- transloc_hardclip_high = querylength - Substring_queryend(donor);
-
- } else {
- /* sensep false */
- transloc_hardclip_high = Substring_querystart(donor);
- }
-
- } else { /* sensep != Substring_plusp(donor) */
- transloc_hardclip_high = 0;
- if (plusp == true) {
- transloc_hardclip_low = Substring_querystart(donor);
-
- } else {
- transloc_hardclip_low = querylength - Substring_queryend(donor);
- }
- }
-
- if (transloc_hardclip_low > hardclip_low) {
- hardclip_low = transloc_hardclip_low;
- }
- if (transloc_hardclip_high > hardclip_high) {
- hardclip_high = transloc_hardclip_high;
- }
- }
-
- if (sensep == plusp) {
- if (plusp == true) {
- /* sensep true */
- assert(Substring_chimera_pos(donor) == Substring_queryend(donor));
- if (hide_soft_clips_p == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_querystart(donor) +
- Substring_match_length(donor),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(donor),
- /*querypos*/Substring_queryend(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
-
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(donor),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(donor),
- /*querypos*/Substring_querystart(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(donor),
- /*querypos*/Substring_queryend(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- }
+ substring = (Substring_T) List_head(p);
+ if (List_next(p) == endp) {
+ /* Last substring, plus, not hard-clipped */
+ debug(printf("Last substring, plus, not hard-clipped %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+ Substring_queryend((Substring_T) List_head(p))));
+
+ if (hide_soft_clips_p == true) {
+ print_cigar(fp,/*type*/'M',
+ Substring_match_length(substring) +
+ (querylength - Substring_queryend(substring)),
+ /*querypos*/Substring_querystart(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
+ } else {
+ print_cigar(fp,/*type*/'M',Substring_match_length(substring),
+ /*querypos*/Substring_querystart(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
+ }
+ finalp = p;
+ nextp = List_next(p);
- } else {
- /* sensep false */
- assert(Substring_chimera_pos(donor) == Substring_querystart(donor));
- if (hide_soft_clips_p == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- (querylength - Substring_queryend(donor)) +
- Substring_match_length(donor),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(donor),
- /*querypos*/Substring_querystart(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ } else {
+ /* Middle substring, plus */
+ debug(printf("Middle substring, plus %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+ Substring_queryend((Substring_T) List_head(p))));
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(donor),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(donor),
- /*querypos*/Substring_queryend(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(donor),
- /*querypos*/Substring_querystart(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ print_cigar(fp,/*type*/'M',Substring_match_length(substring),
+ /*querypos*/Substring_querystart(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ }
+ p = List_next(p);
}
- }
+
+ if (p != endp) {
+ if ((type = Junction_type(post_junction)) == DEL_JUNCTION) {
+ FPRINTF(fp,"%dD",Junction_nindels(post_junction));
+ nindels += Junction_nindels(post_junction);
+ } else if (type == INS_JUNCTION) {
+ FPRINTF(fp,"%dI",Junction_nindels(post_junction));
+ nindels += Junction_nindels(post_junction);
+ } else if (type == SPLICE_JUNCTION) {
+ FPRINTF(fp,"%uN",Junction_splice_distance(post_junction));
+ }
- } else { /* sensep != Substring_plusp(donor) */
- if (plusp == true) {
- assert(Substring_chimera_pos(donor) == Substring_querystart(donor));
- if (hide_soft_clips_p == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(donor),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_match_length(donor) +
- (querylength - Substring_queryend(donor)),
- /*querypos*/Substring_querystart(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(donor),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(donor),
- /*querypos*/Substring_querystart(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(donor),
- /*querypos*/Substring_queryend(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- }
+ /* Last substring, plus, hard-clipped */
+ substring = (Substring_T) List_head(p);
+ debug(printf("Last substring, plus, hard-clipped %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+ Substring_queryend((Substring_T) List_head(p))));
+ if (hide_soft_clips_p == true) {
+ print_cigar(fp,/*type*/'M',
+ Substring_match_length(substring) +
+ (querylength - Substring_queryend(substring)),
+ /*querypos*/Substring_querystart(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
+ } else {
+ print_cigar(fp,/*type*/'M',Substring_match_length(substring),
+ /*querypos*/Substring_querystart(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
+ }
+ finalp = p;
+ nextp = List_next(p);
- } else {
- assert(Substring_chimera_pos(donor) == Substring_queryend(donor));
- if (hide_soft_clips_p == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(donor),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_match_length(donor) +
- Substring_querystart(donor),
- /*querypos*/Substring_queryend(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(donor),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(donor),
- /*querypos*/Substring_queryend(donor),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(donor),
- /*querypos*/Substring_querystart(donor),querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/true);
}
}
- }
-
- result = check_cigar_types(cigar_types);
-
- Intlist_free(&cigar_types);
- return result;
-}
-
-
-static void
-print_halfacceptor (FILE *fp, char *abbrev, Substring_T acceptor, Stage3end_T this, Stage3end_T mate,
- char *acc1, char *acc2, int pathnum, int npaths, int absmq_score, int first_absmq, int second_absmq, int mapq_score,
- Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
- Chrpos_T concordant_chrpos, Chrpos_T donor_chrpos, Chrpos_T acceptor_chrpos, Chrpos_T mate_chrpos,
- int clipdir, int hardclip_low, int hardclip_high, Resulttype_T resulttype, bool first_read_p, int npaths_mate,
- int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
- bool use_hardclip_p, bool print_xt_p, char donor_strand, char acceptor_strand,
- char *donor_chr, char *acceptor_chr, char donor1, char donor2, char acceptor2, char acceptor1,
- double donor_prob, double acceptor_prob, bool circularp) {
- unsigned int flag = 0U;
- int nmismatches_refdiff = 0, nmismatches_bothdiff = 0, querylength;
- bool sensep;
- char *genomicfwd_refdiff, *genomicfwd_bothdiff, *genomicdir_refdiff, *genomicdir_bothdiff;
- int substring_start, substring_length;
- int transloc_hardclip_low, transloc_hardclip_high;
- bool plusp, printp;
- bool start_ambig, end_ambig;
- int amb_length_start, amb_length_end;
- int n, i;
- Univcoord_T *start_ambcoords, *end_ambcoords, splicecoord;
-#ifdef PRINT_AMBIG_COORDS
- Univcoord_T chroffset;
-#endif
-
-
- querylength = Shortread_fulllength(queryseq);
- plusp = Substring_plusp(acceptor);
-
- /* 1. QNAME */
- if (acc2 == NULL) {
- fprintf(fp,"%s",acc1);
} else {
- fprintf(fp,"%s,%s",acc1,acc2);
- }
-
- /* 2. FLAG */
- flag = SAM_compute_flag(plusp,mate,resulttype,first_read_p,
- pathnum,npaths,npaths_mate,absmq_score,first_absmq,
- invertp,invert_mate_p);
- fprintf(fp,"\t%u",flag);
-
- /* 3. RNAME: chr */
- /* 4. POS: chrpos */
- print_chromosomal_pos(fp,Substring_chrnum(acceptor),acceptor_chrpos,Substring_chrlength(acceptor),chromosome_iit);
-
-
- /* 5. MAPQ: Mapping quality */
- fprintf(fp,"\t%d",mapq_score);
-
- /* 6. CIGAR */
- fprintf(fp,"\t");
- /* sensedir for chimera must be SENSE_FORWARD or SENSE_ANTI, not SENSE_NULL */
- /* sensedir = Substring_chimera_sensedir(acceptor); */
- sensep = Substring_chimera_sensep(acceptor);
-
- if (use_hardclip_p == true) {
- if (sensep != plusp) {
- transloc_hardclip_low = 0;
- if (plusp == true) {
- /* sensep false */
- transloc_hardclip_high = querylength - Substring_queryend(acceptor);
-
- } else {
- /* sensep true */
- transloc_hardclip_high = Substring_querystart(acceptor);
- }
-
- } else { /* sensep == Substring_plusp(acceptor) */
- transloc_hardclip_high = 0;
- if (plusp == true) {
- transloc_hardclip_low = Substring_querystart(acceptor);
-
- } else {
- transloc_hardclip_low = querylength - Substring_queryend(acceptor);
- }
- }
-
- if (transloc_hardclip_low > hardclip_low) {
- hardclip_low = transloc_hardclip_low;
- }
- if (transloc_hardclip_high > hardclip_high) {
- hardclip_high = transloc_hardclip_high;
- }
- }
-
- if (sensep != plusp) {
- if (plusp == true) {
- /* sensep false */
- assert(Substring_chimera_pos(acceptor) == Substring_queryend(acceptor));
- if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/'M',
- Substring_querystart(acceptor) +
- Substring_match_length(acceptor),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- print_cigar(fp,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(acceptor),
- /*querypos*/Substring_queryend(acceptor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else {
- print_cigar(fp,/*type*/'S',Substring_querystart(acceptor),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- print_cigar(fp,/*type*/'M',Substring_match_length(acceptor),
- /*querypos*/Substring_querystart(acceptor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- print_cigar(fp,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(acceptor),
- /*querypos*/Substring_queryend(acceptor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- }
-
- } else {
- /* sensep true */
- assert(Substring_chimera_pos(acceptor) == Substring_querystart(acceptor));
- if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/'M',
- (querylength - Substring_queryend(acceptor)) +
- Substring_match_length(acceptor),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/false);
- print_cigar(fp,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(acceptor),
- /*querypos*/Substring_querystart(acceptor),querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/true);
- } else {
- print_cigar(fp,/*type*/'S',querylength - Substring_queryend(acceptor),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- print_cigar(fp,/*type*/'M',Substring_match_length(acceptor),
- /*querypos*/Substring_queryend(acceptor),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- print_cigar(fp,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(acceptor),
- /*querypos*/Substring_querystart(acceptor),querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/true);
- }
- }
+ /* Minus */
+ while (p != endp && Substring_querystart((Substring_T) List_head(p)) >= querylength - hardclip_low) {
+ /* Skip, because substring entirely in hard-clipped region */
+ debug(printf("Skipping %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+ Substring_queryend((Substring_T) List_head(p))));
+ prevp = p;
+ p = List_next(p);
+ q = List_next(q);
+ }
+
+ substring = (Substring_T) List_head(p);
+ if (List_next(p) == endp || Substring_querystart(substring) < hardclip_high) {
+ /* Single substring */
+ debug(printf("Single substring %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+ Substring_queryend((Substring_T) List_head(p))));
- } else { /* sensep == Substring_plusp(acceptor) */
- if (plusp == true) {
- assert(Substring_chimera_pos(acceptor) == Substring_querystart(acceptor));
if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(acceptor),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
print_cigar(fp,/*type*/'M',
- Substring_match_length(acceptor) +
- (querylength - Substring_queryend(acceptor)),
- /*querypos*/Substring_querystart(acceptor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ (querylength - Substring_queryend(substring)) +
+ Substring_match_length(substring) + Substring_querystart(substring),
+ /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/true,/*trimlength*/0);
} else {
- print_cigar(fp,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(acceptor),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- print_cigar(fp,/*type*/'M',Substring_match_length(acceptor),
- /*querypos*/Substring_querystart(acceptor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- print_cigar(fp,/*type*/'S',querylength - Substring_queryend(acceptor),
- /*querypos*/Substring_queryend(acceptor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
+ /*querypos*/querylength,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+ print_cigar(fp,/*type*/'M',Substring_match_length(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+ print_cigar(fp,/*type*/'S',Substring_querystart(substring),
+ /*querypos*/Substring_querystart(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
}
+ finalp = p;
+ nextp = List_next(p);
} else {
- assert(Substring_chimera_pos(acceptor) == Substring_queryend(acceptor));
+ /* First substring, minus */
+ debug(printf("First substring, minus %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+ Substring_queryend((Substring_T) List_head(p))));
+
+ post_junction = (Junction_T) List_head(q);
+
if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(acceptor),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
print_cigar(fp,/*type*/'M',
- Substring_match_length(acceptor) +
- Substring_querystart(acceptor),
- /*querypos*/Substring_queryend(acceptor),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ (querylength - Substring_queryend(substring)) +
+ Substring_match_length(substring),
+ /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/false,/*trimlength*/0);
} else {
- print_cigar(fp,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(acceptor),
+ print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
/*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- print_cigar(fp,/*type*/'M',Substring_match_length(acceptor),
- /*querypos*/Substring_queryend(acceptor),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- print_cigar(fp,/*type*/'S',Substring_querystart(acceptor),
- /*querypos*/Substring_querystart(acceptor),querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/true);
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+ print_cigar(fp,/*type*/'M',Substring_match_length(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+ }
+ p = List_next(p);
+
+ while (p != endp && Substring_querystart((Substring_T) List_head(p)) >= hardclip_high) {
+ if ((type = Junction_type(post_junction)) == DEL_JUNCTION) {
+ FPRINTF(fp,"%dD",Junction_nindels(post_junction));
+ nindels += Junction_nindels(post_junction);
+ } else if (type == INS_JUNCTION) {
+ FPRINTF(fp,"%dI",Junction_nindels(post_junction));
+ nindels += Junction_nindels(post_junction);
+ } else if (type == SPLICE_JUNCTION) {
+ FPRINTF(fp,"%uN",Junction_splice_distance(post_junction));
+ }
+ q = List_next(q);
+ if (q == NULL) {
+ } else {
+ post_junction = (Junction_T) List_head(q);
+ }
+
+ substring = (Substring_T) List_head(p);
+ if (List_next(p) == endp) {
+ /* Last substring, minus, not hard-clipped */
+ debug(printf("Last substring, minus, not hard-clipped %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+ Substring_queryend((Substring_T) List_head(p))));
+
+ if (hide_soft_clips_p == true) {
+ print_cigar(fp,/*type*/'M',
+ Substring_match_length(substring) +
+ Substring_querystart(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
+ } else {
+ print_cigar(fp,/*type*/'M',Substring_match_length(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+ print_cigar(fp,/*type*/'S',Substring_querystart(substring),
+ /*querypos*/Substring_querystart(substring),querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/true,/*trimlength*/0);
+ }
+ finalp = p;
+ nextp = List_next(p);
+
+ } else {
+ /* Middle substring, minus */
+ debug(printf("Middle substring, minus %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+ Substring_queryend((Substring_T) List_head(p))));
+
+ print_cigar(fp,/*type*/'M',Substring_match_length(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+ }
+ p = List_next(p);
+ }
+
+ if (p != endp) {
+ if ((type = Junction_type(post_junction)) == DEL_JUNCTION) {
+ FPRINTF(fp,"%dD",Junction_nindels(post_junction));
+ nindels += Junction_nindels(post_junction);
+ } else if (type == INS_JUNCTION) {
+ FPRINTF(fp,"%dI",Junction_nindels(post_junction));
+ nindels += Junction_nindels(post_junction);
+ } else if (type == SPLICE_JUNCTION) {
+ FPRINTF(fp,"%uN",Junction_splice_distance(post_junction));
+ }
+
+ /* Last substring, minus, hard-clipped */
+ substring = (Substring_T) List_head(p);
+ debug(printf("Last substring, minus, hard-clipped %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+ Substring_queryend((Substring_T) List_head(p))));
+
+ if (hide_soft_clips_p == true) {
+ print_cigar(fp,/*type*/'M',
+ Substring_match_length(substring) +
+ Substring_querystart(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
+ } else {
+ print_cigar(fp,/*type*/'M',Substring_match_length(substring),
+ /*querypos*/Substring_queryend(substring),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+ print_cigar(fp,/*type*/'S',Substring_querystart(substring),
+ /*querypos*/Substring_querystart(substring),querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/true,/*trimlength*/0);
+ }
+ finalp = p;
+ nextp = List_next(p);
+
}
}
}
@@ -3815,61 +1530,67 @@ print_halfacceptor (FILE *fp, char *abbrev, Substring_T acceptor, Stage3end_T th
/* 7. MRNM: Mate chr */
/* 8. MPOS: Mate chrpos */
- /* For anchor_chrnum, previously used Stage3end_chrnum(this), but this is 0 */
print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
mate_chrpos,Stage3end_chrlength(mate),
- /*anchor_chrnum*/Substring_chrnum(acceptor),acceptor_chrpos,chromosome_iit);
+ Stage3end_chrnum(stage3end),chrpos,chromosome_iit);
/* 9. ISIZE: Insert size */
if (resulttype == CONCORDANT_UNIQ || resulttype == CONCORDANT_TRANSLOC || resulttype == CONCORDANT_MULT) {
if (plusp == invertp) {
- fprintf(fp,"\t%d",-pairedlength);
+ FPRINTF(fp,"\t%d",-pairedlength);
} else {
- fprintf(fp,"\t%d",pairedlength);
+ FPRINTF(fp,"\t%d",pairedlength);
}
} else if (mate_chrpos == 0) {
- fprintf(fp,"\t%d",pairedlength);
- } else if (concordant_chrpos < mate_chrpos) {
- fprintf(fp,"\t%d",pairedlength);
- } else if (concordant_chrpos > mate_chrpos) {
- fprintf(fp,"\t%d",-pairedlength);
+ FPRINTF(fp,"\t%d",pairedlength);
+ } else if (chrpos < mate_chrpos) {
+ FPRINTF(fp,"\t%d",pairedlength);
+ } else if (chrpos > mate_chrpos) {
+ FPRINTF(fp,"\t%d",-pairedlength);
} else if (first_read_p == true) {
- fprintf(fp,"\t%d",pairedlength);
+ FPRINTF(fp,"\t%d",pairedlength);
} else {
- fprintf(fp,"\t%d",-pairedlength);
+ FPRINTF(fp,"\t%d",-pairedlength);
}
/* 10. SEQ: queryseq and 11. QUAL: quality scores */
/* Queryseq has already been inverted, so just measure plusp relative to its current state */
- fprintf(fp,"\t");
if (plusp == true) {
- Shortread_print_chopped(fp,queryseq,hardclip_low,hardclip_high);
- fprintf(fp,"\t");
+ Shortread_print_chopped_sam(fp,queryseq,hardclip_low,hardclip_high);
+ FPRINTF(fp,"\t");
Shortread_print_quality(fp,queryseq,hardclip_low,hardclip_high,
- quality_shift,/*show_chopped_p*/false);
+ quality_shift,/*show_chopped_p*/false);
} else {
- Shortread_print_chopped_revcomp(fp,queryseq,hardclip_low,hardclip_high);
- fprintf(fp,"\t");
+ Shortread_print_chopped_revcomp_sam(fp,queryseq,hardclip_low,hardclip_high);
+ FPRINTF(fp,"\t");
Shortread_print_quality_revcomp(fp,queryseq,hardclip_low,hardclip_high,
- quality_shift,/*show_chopped_p*/false);
- }
-
+ quality_shift,/*show_chopped_p*/false);
+ }
/* 12. TAGS: RG */
if (sam_read_group_id != NULL) {
- fprintf(fp,"\tRG:Z:%s",sam_read_group_id);
+ FPRINTF(fp,"\tRG:Z:%s",sam_read_group_id);
}
- /* 12. TAGS: XH */
+ /* 12. TAGS: XH and XI */
if (hardclip_low > 0 || hardclip_high > 0) {
- fprintf(fp,"\tXH:Z:");
+ FPRINTF(fp,"\tXH:Z:");
if (plusp == true) {
Shortread_print_chopped_end(fp,queryseq,hardclip_low,hardclip_high);
} else {
Shortread_print_chopped_end_revcomp(fp,queryseq,hardclip_low,hardclip_high);
}
+
+ if (Shortread_quality_string(queryseq) != NULL) {
+ FPRINTF(fp,"\tXI:Z:");
+ if (plusp == true) {
+ Shortread_print_chopped_end_quality(fp,queryseq,hardclip_low,hardclip_high);
+ } else {
+ Shortread_print_chopped_end_quality_reverse(fp,queryseq,hardclip_low,hardclip_high);
+ }
+ }
}
/* 12. TAGS: XB */
@@ -3879,614 +1600,827 @@ print_halfacceptor (FILE *fp, char *abbrev, Substring_T acceptor, Stage3end_T th
Shortread_print_chop(fp,queryseq,invertp);
/* 12. TAGS: MD */
- fprintf(fp,"\tMD:Z:");
+ FPRINTF(fp,"\tMD:Z:");
+ p = startp;
+ q = startq;
printp = false;
- if (hide_soft_clips_p == true) {
- substring_start = Substring_querystart_orig(acceptor);
- substring_length = Substring_match_length_orig(acceptor);
- } else {
- substring_start = Substring_querystart(acceptor);
- substring_length = Substring_match_length(acceptor);
- }
+ if (plusp == true) {
+ /* Plus */
+ while (p != endp && Substring_queryend((Substring_T) List_head(p)) < hardclip_low) {
+ /* Skip, because substring entirely in hard-clipped region */
+ p = List_next(p);
+ q = List_next(q);
+ }
- if (use_hardclip_p == false) {
- genomicdir_refdiff = Substring_genomic_refdiff(acceptor);
- genomicdir_bothdiff = Substring_genomic_bothdiff(acceptor);
- if (plusp == true) {
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
- &(genomicdir_refdiff[substring_start]),&(genomicdir_bothdiff[substring_start]),
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else if (genomicdir_bothdiff == genomicdir_refdiff) {
- genomicfwd_refdiff = (char *) MALLOCA((querylength+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- FREEA(genomicfwd_refdiff);
+ substring = (Substring_T) List_head(p);
+ if (List_next(p) == endp || Substring_queryend(substring) >= querylength - hardclip_high) {
+ /* Single substring */
+ if (hide_soft_clips_p == true) {
+ substring_start = Substring_querystart_orig(substring);
+ substring_length = Substring_match_length_orig(substring);
+ } else {
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
+ }
+
+ if ((genomicfwd_bothdiff = Substring_genomic_bothdiff(substring)) == NULL) {
+ /* matchlength = */ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
+ /*genomicfwd_refdiff*/NULL,/*genomicfwd_bothdiff*/NULL,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ } else {
+ genomicfwd_refdiff = Substring_genomic_refdiff(substring);
+ /* matchlength = */ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
+ &(genomicfwd_refdiff[substring_start]),&(genomicfwd_bothdiff[substring_start]),
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ }
+
} else {
- genomicfwd_refdiff = (char *) MALLOCA((querylength+1) * sizeof(char));
- genomicfwd_bothdiff = (char *) MALLOCA((querylength+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
- make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring_start]),substring_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- FREEA(genomicfwd_bothdiff);
- FREEA(genomicfwd_refdiff);
+ /* First substring, plus */
+ if (hide_soft_clips_p == true) {
+ substring_start = Substring_querystart_orig(substring);
+ substring_length = Substring_match_length_orig(substring);
+ } else {
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
+ }
+
+ post_junction = (Junction_T) List_head(q);
+ if ((type = Junction_type(post_junction)) == DEL_JUNCTION) {
+ lastp = true;
+ } else {
+ lastp = false;
+ }
+
+ if ((genomicfwd_bothdiff = Substring_genomic_bothdiff(substring)) == NULL) {
+ matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
+ /*genomicfwd_refdiff*/NULL,/*genomicfwd_bothdiff*/NULL,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,lastp);
+ } else {
+ genomicfwd_refdiff = Substring_genomic_refdiff(substring);
+ matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
+ &(genomicfwd_refdiff[substring_start]),&(genomicfwd_bothdiff[substring_start]),
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,lastp);
+ }
+ p = List_next(p);
+
+ while (p != endp && Substring_queryend((Substring_T) List_head(p)) < querylength - hardclip_high) {
+ if (type == DEL_JUNCTION) {
+ deletion_string = Junction_deletion_string(post_junction,genome,/*plusp*/true);
+ FPRINTF(fp,"^%s",deletion_string);
+ FREE(deletion_string);
+ }
+ q = List_next(q);
+ if (q == NULL) {
+ lastp = true;
+ } else {
+ post_junction = (Junction_T) List_head(q);
+ if ((type = Junction_type(post_junction)) == DEL_JUNCTION) {
+ lastp = true;
+ } else {
+ lastp = false;
+ }
+ }
+
+ substring = (Substring_T) List_head(p);
+ if (List_next(p) == endp) {
+ /* Last substring, plus, not hard-clipped */
+ if (hide_soft_clips_p == true) {
+ substring_start = Substring_querystart_orig(substring);
+ substring_length = Substring_match_length_orig(substring);
+ } else {
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
+ }
+
+ if ((genomicfwd_bothdiff = Substring_genomic_bothdiff(substring)) == NULL) {
+ /* matchlength = */ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,matchlength,
+ /*genomicfwd_refdiff*/NULL,/*genomicfwd_bothdiff*/NULL,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ } else {
+ genomicfwd_refdiff = Substring_genomic_refdiff(substring);
+ /* matchlength = */ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,matchlength,
+ &(genomicfwd_refdiff[substring_start]),&(genomicfwd_bothdiff[substring_start]),
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ }
+
+ } else {
+ /* Middle substring, plus */
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
+
+ if ((genomicfwd_bothdiff = Substring_genomic_bothdiff(substring)) == NULL) {
+ matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,matchlength,
+ /*genomicfwd_refdiff*/NULL,/*genomicfwd_bothdiff*/NULL,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,lastp);
+ } else {
+ genomicfwd_refdiff = Substring_genomic_refdiff(substring);
+ matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,matchlength,
+ &(genomicfwd_refdiff[substring_start]),&(genomicfwd_bothdiff[substring_start]),
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,lastp);
+ }
+ }
+ p = List_next(p);
+ }
+
+ if (p != endp) {
+ if (type == DEL_JUNCTION) {
+ deletion_string = Junction_deletion_string(post_junction,genome,/*plusp*/true);
+ FPRINTF(fp,"^%s",deletion_string);
+ FREE(deletion_string);
+ }
+
+ /* Last substring, plus, hard-clipped */
+ substring = (Substring_T) List_head(p);
+ if (hide_soft_clips_p == true) {
+ substring_start = Substring_querystart_orig(substring);
+ substring_length = Substring_match_length_orig(substring);
+ } else {
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
+ }
+
+ if ((genomicfwd_bothdiff = Substring_genomic_bothdiff(substring)) == NULL) {
+ /* matchlength = */ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,matchlength,
+ /*genomicfwd_refdiff*/NULL,/*genomicfwd_bothdiff*/NULL,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ } else {
+ genomicfwd_refdiff = Substring_genomic_refdiff(substring);
+ /* matchlength = */ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,matchlength,
+ &(genomicfwd_refdiff[substring_start]),&(genomicfwd_bothdiff[substring_start]),
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ }
+ }
}
- } else if (sensep == false) {
- if (plusp == true) {
- genomicfwd_refdiff = Substring_genomic_refdiff(acceptor);
- genomicfwd_bothdiff = Substring_genomic_bothdiff(acceptor);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
- &(genomicfwd_refdiff[substring_start]),&(genomicfwd_bothdiff[substring_start]),
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else {
- genomicdir_refdiff = Substring_genomic_refdiff(acceptor);
- genomicdir_bothdiff = Substring_genomic_bothdiff(acceptor);
- if (genomicdir_bothdiff == genomicdir_refdiff) {
+ } else {
+ /* Minus */
+ while (p != endp && Substring_querystart((Substring_T) List_head(p)) >= querylength - hardclip_low) {
+ /* Skip, because substring entirely in hard-clipped region */
+ p = List_next(p);
+ q = List_next(q);
+ }
+
+ substring = (Substring_T) List_head(p);
+ if (List_next(p) == endp || querylength - Substring_queryend(substring) >= querylength - hardclip_high) {
+ /* Single substring */
+ if (hide_soft_clips_p == true) {
+ substring_start = Substring_querystart_orig(substring);
+ substring_length = Substring_match_length_orig(substring);
+ } else {
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
+ }
+
+ if ((genomicdir_bothdiff = Substring_genomic_bothdiff(substring)) == NULL) {
+ /* matchlength = */ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,/*matchlength*/0,/*genomicfwd_refdiff*/NULL,/*genomicfwd_bothdiff*/NULL,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ } else if ((genomicdir_refdiff = Substring_genomic_refdiff(substring)) == genomicdir_bothdiff) {
genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ /* matchlength = */ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
FREEA(genomicfwd_refdiff);
} else {
genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
genomicfwd_bothdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring_start]),substring_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ /* matchlength = */ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
FREEA(genomicfwd_bothdiff);
FREEA(genomicfwd_refdiff);
}
- }
-
- } else { /* sensep true */
- if (plusp == true) {
- genomicfwd_refdiff = Substring_genomic_refdiff(acceptor);
- genomicfwd_bothdiff = Substring_genomic_bothdiff(acceptor);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
- &(genomicfwd_refdiff[substring_start]),&(genomicfwd_bothdiff[substring_start]),
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
} else {
- genomicdir_refdiff = Substring_genomic_refdiff(acceptor);
- genomicdir_bothdiff = Substring_genomic_bothdiff(acceptor);
- if (genomicdir_bothdiff == genomicdir_refdiff) {
+ /* First substring, minus */
+ if (hide_soft_clips_p == true) {
+ substring_start = Substring_querystart_orig(substring);
+ substring_length = Substring_match_length_orig(substring);
+ } else {
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
+ }
+
+ post_junction = (Junction_T) List_head(q);
+ if ((type = Junction_type(post_junction)) == DEL_JUNCTION) {
+ lastp = true;
+ } else {
+ lastp = false;
+ }
+
+ if ((genomicdir_bothdiff = Substring_genomic_bothdiff(substring)) == NULL) {
+ matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,/*matchlength*/0,/*genomicfwd_refdiff*/NULL,/*genomicfwd_bothdiff*/NULL,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,lastp);
+ } else if ((genomicdir_refdiff = Substring_genomic_refdiff(substring)) == genomicdir_bothdiff) {
genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,lastp);
FREEA(genomicfwd_refdiff);
} else {
genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
genomicfwd_bothdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring_start]),substring_length);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
- substring_length,/*querypos*/substring_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,lastp);
FREEA(genomicfwd_bothdiff);
FREEA(genomicfwd_refdiff);
}
+ p = List_next(p);
+
+ while (p != endp && querylength - Substring_queryend((Substring_T) List_head(p)) < querylength - hardclip_high) {
+ if (type == DEL_JUNCTION) {
+ deletion_string = Junction_deletion_string(post_junction,genome,/*plusp:true*/true);
+ FPRINTF(fp,"^%s",deletion_string);
+ FREE(deletion_string);
+ }
+ q = List_next(q);
+ if (q == NULL) {
+ lastp = true;
+ } else {
+ post_junction = (Junction_T) List_head(q);
+ if ((type = Junction_type(post_junction)) == DEL_JUNCTION) {
+ lastp = true;
+ } else {
+ lastp = false;
+ }
+ }
+
+ substring = (Substring_T) List_head(p);
+ if (List_next(p) == endp) {
+ /* Last substring, minus, not hard-clipped */
+ if (hide_soft_clips_p == true) {
+ substring_start = Substring_querystart_orig(substring);
+ substring_length = Substring_match_length_orig(substring);
+ } else {
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
+ }
+
+ if ((genomicdir_bothdiff = Substring_genomic_bothdiff(substring)) == NULL) {
+ /* matchlength = */ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,matchlength,/*genomicfwd_refdiff*/NULL,/*genomicfwd_bothdiff*/NULL,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ } else if ((genomicdir_refdiff = Substring_genomic_refdiff(substring)) == genomicdir_bothdiff) {
+ genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+ /* matchlength = */ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,matchlength,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ FREEA(genomicfwd_refdiff);
+ } else {
+ genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ genomicfwd_bothdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+ make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring_start]),substring_length);
+ /* matchlength = */ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,matchlength,genomicfwd_refdiff,genomicfwd_bothdiff,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ FREEA(genomicfwd_bothdiff);
+ FREEA(genomicfwd_refdiff);
+ }
+
+ } else {
+ /* Middle substring, minus */
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
+
+ if ((genomicdir_bothdiff = Substring_genomic_bothdiff(substring)) == NULL) {
+ matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,matchlength,/*genomicfwd_refdiff*/NULL,/*genomicfwd_bothdiff*/NULL,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,lastp);
+ } else if ((genomicdir_refdiff = Substring_genomic_refdiff(substring)) == genomicdir_bothdiff) {
+ genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+ matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,matchlength,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,lastp);
+ FREEA(genomicfwd_refdiff);
+ } else {
+ genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ genomicfwd_bothdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+ make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring_start]),substring_length);
+ matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,matchlength,genomicfwd_refdiff,genomicfwd_bothdiff,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,lastp);
+ FREEA(genomicfwd_bothdiff);
+ FREEA(genomicfwd_refdiff);
+ }
+ }
+ p = List_next(p);
+ }
+
+ if (p != endp) {
+ if (type == DEL_JUNCTION) {
+ deletion_string = Junction_deletion_string(post_junction,genome,/*plusp:true*/true);
+ FPRINTF(fp,"^%s",deletion_string);
+ FREE(deletion_string);
+ }
+
+ /* Last substring, minus, hard-clipped */
+ substring = (Substring_T) List_head(p);
+ if (hide_soft_clips_p == true) {
+ substring_start = Substring_querystart_orig(substring);
+ substring_length = Substring_match_length_orig(substring);
+ } else {
+ substring_start = Substring_querystart(substring);
+ substring_length = Substring_match_length(substring);
+ }
+
+ if ((genomicdir_bothdiff = Substring_genomic_bothdiff(substring)) == NULL) {
+ /* matchlength = */ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,matchlength,/*genomicfwd_refdiff*/NULL,/*genomicfwd_bothdiff*/NULL,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ } else if ((genomicdir_refdiff = Substring_genomic_refdiff(substring)) == genomicdir_bothdiff) {
+ genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+ /* matchlength = */ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,matchlength,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ FREEA(genomicfwd_refdiff);
+ } else {
+ genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ genomicfwd_bothdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+ make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring_start]),substring_length);
+ /* matchlength = */ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,matchlength,genomicfwd_refdiff,genomicfwd_bothdiff,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ FREEA(genomicfwd_bothdiff);
+ FREEA(genomicfwd_refdiff);
+ }
+ }
}
}
+
if (printp == false) {
- fprintf(fp,"0");
+ FPRINTF(fp,"0");
}
/* 12. TAGS: NH */
- fprintf(fp,"\tNH:i:%d",npaths);
-
/* 12. TAGS: HI */
- fprintf(fp,"\tHI:i:%d",pathnum);
-
/* 12. TAGS: NM */
- /* fprintf(fp,"\tNM:i:%d",Substring_nmismatches_refdiff(acceptor)); */
- fprintf(fp,"\tNM:i:%d",nmismatches_refdiff);
+ FPRINTF(fp,"\tNH:i:%d\tHI:i:%d\tNM:i:%d",npaths,pathnum,nmismatches_refdiff + nindels);
if (snps_iit) {
/* 12. TAGS: XW and XV */
- fprintf(fp,"\tXW:i:%d",nmismatches_bothdiff);
- fprintf(fp,"\tXV:i:%d",nmismatches_refdiff - nmismatches_bothdiff);
+ FPRINTF(fp,"\tXW:i:%d",nmismatches_bothdiff);
+ FPRINTF(fp,"\tXV:i:%d",nmismatches_refdiff - nmismatches_bothdiff);
}
/* 12. TAGS: SM */
- fprintf(fp,"\tSM:i:%d",mapq_score);
-
/* 12. TAGS: XQ */
- fprintf(fp,"\tXQ:i:%d",absmq_score);
-
/* 12. TAGS: X2 */
- fprintf(fp,"\tX2:i:%d",second_absmq);
+ FPRINTF(fp,"\tSM:i:%d\tXQ:i:%d\tX2:i:%d",mapq_score,absmq_score,second_absmq);
/* 12. TAGS: XO */
- fprintf(fp,"\tXO:Z:%s",abbrev);
+ FPRINTF(fp,"\tXO:Z:%s",abbrev);
/* 12. TAGS: XS */
-#if 0
- /* Not necessary to compute, because already computed by print_exon_exon */
- /* sensedir for chimera must be SENSE_FORWARD or SENSE_ANTI, not SENSE_NULL */
if (sensedir == SENSE_FORWARD) {
if (plusp == true) {
- fprintf(fp,"\tXS:A:+");
+ FPRINTF(fp,"\tXS:A:+");
} else {
- fprintf(fp,"\tXS:A:-");
+ FPRINTF(fp,"\tXS:A:-");
}
} else if (sensedir == SENSE_ANTI) {
if (plusp == true) {
- fprintf(fp,"\tXS:A:-");
+ FPRINTF(fp,"\tXS:A:-");
} else {
- fprintf(fp,"\tXS:A:+");
+ FPRINTF(fp,"\tXS:A:+");
}
+#if 0
+ /* Don't print XS field for SENSE_NULL */
} else if (force_xs_direction_p == true) {
- fprintf(fp,"\tXS:A:+");
+ FPRINTF(fp,"\tXS:A:+");
} else {
- fprintf(fp,"\tXS:A:?");
- }
-#else
- fprintf(fp,"\tXS:A:%c",acceptor_strand);
+ FPRINTF(fp,"\tXS:A:?");
#endif
+ }
- /* 12. TAGS: XA */
- if ((start_ambig = Stage3end_start_ambiguous_p(this)) == true ||
- (end_ambig = Stage3end_end_ambiguous_p(this)) == true) {
- fprintf(fp,"\tXA:Z:");
-
- if (plusp == true) {
- if ((n = Stage3end_start_nambcoords(this)) > 0) {
- assert(sensep == true);
- start_ambcoords = Stage3end_start_ambcoords(this);
-#ifdef PRINT_AMBIG_COORDS
- chroffset = Substring_chroffset(acceptor);
- fprintf(fp,"%u",start_ambcoords[0] - chroffset + 1U);
- for (i = 1; i < n; i++) {
- fprintf(fp,",%u",start_ambcoords[i] - chroffset + 1U);
- }
-#else
- splicecoord = Substring_alignstart(acceptor);
- fprintf(fp,"%u",splicecoord - start_ambcoords[0]);
- for (i = 1; i < n; i++) {
- fprintf(fp,",%u",splicecoord - start_ambcoords[i]);
- }
-#endif
- }
- fprintf(fp,"|");
- if ((n = Stage3end_end_nambcoords(this)) > 0) {
- assert(sensep == false);
- end_ambcoords = Stage3end_end_ambcoords(this);
-#ifdef PRINT_AMBIG_COORDS
- chroffset = Substring_chroffset(acceptor);
- fprintf(fp,"%u",end_ambcoords[0] - chroffset + 1U);
- for (i = 1; i < n; i++) {
- fprintf(fp,",%u",end_ambcoords[i] - chroffset + 1U);
- }
-#else
- splicecoord = Substring_alignend(acceptor);
- fprintf(fp,"%u",end_ambcoords[0] - splicecoord);
- for (i = 1; i < n; i++) {
- fprintf(fp,",%u",end_ambcoords[i] - splicecoord);
- }
-#endif
- }
-
- } else {
- if ((n = Stage3end_end_nambcoords(this)) > 0) {
- assert(sensep == false);
- end_ambcoords = Stage3end_end_ambcoords(this);
+ /* 12. TAGS: XA */
+ if (prevp == NULL) {
+ /* substringL = (Substring_T) NULL; */
+ ambigL = false;
+ } else {
+ substringL = (Substring_T) List_head(prevp);
+ ambigL = Substring_ambiguous_p(substringL);
+ }
+ if (nextp == NULL) {
+ ambigH = false;
+ } else {
+ substringH = (Substring_T) List_head(nextp);
+ ambigH = Substring_ambiguous_p(substringH);
+ }
+
+ if (ambigL == true || ambigH == true) {
+ FPRINTF(fp,"\tXA:Z:");
+
+ if (ambigL == true) {
+ ambcoords = Substring_ambcoords(substringL);
+ n = Substring_nambcoords(substringL);
#ifdef PRINT_AMBIG_COORDS
- chroffset = Substring_chroffset(acceptor);
- fprintf(fp,"%u",end_ambcoords[0] - chroffset + 1U);
- for (i = 1; i < n; i++) {
- fprintf(fp,",%u",end_ambcoords[i] - chroffset + 1U);
- }
+ chroffset = Substring_chroffset(substringL);
+ FPRINTF(fp,"%u",ambcoords[0] - chroffset + 1U);
+ for (i = 1; i < n; i++) {
+ FPRINTF(fp,",%u",ambcoords[i] - chroffset + 1U);
+ }
#else
- splicecoord = Substring_alignend(acceptor);
- fprintf(fp,"%u",splicecoord - end_ambcoords[0]);
- for (i = 1; i < n; i++) {
- fprintf(fp,",%u",splicecoord - end_ambcoords[i]);
- }
-#endif
+ substringM = (Substring_T) List_head(List_next(prevp));
+ if (plusp == true) {
+ splicecoord = Substring_alignstart(substringM);
+ } else {
+ splicecoord = Substring_alignend(substringM);
}
- fprintf(fp,"|");
- if ((n = Stage3end_start_nambcoords(this)) > 0) {
- assert(sensep == true);
- start_ambcoords = Stage3end_start_ambcoords(this);
+ FPRINTF(fp,"%u",splicecoord - ambcoords[0]);
+ for (i = 1; i < n; i++) {
+ FPRINTF(fp,",%u",splicecoord - ambcoords[i]);
+ }
+#endif
+ }
+ FPRINTF(fp,"|");
+ if (ambigH == true) {
+ ambcoords = Substring_ambcoords(substringH);
+ n = Substring_nambcoords(substringH);
#ifdef PRINT_AMBIG_COORDS
- chroffset = Substring_chroffset(acceptor);
- fprintf(fp,"%u",start_ambcoords[0] - chroffset + 1U);
- for (i = 1; i < n; i++) {
- fprintf(fp,",%u",start_ambcoords[i] - chroffset + 1U);
- }
+ chroffset = Substring_chroffset(substringH);
+ FPRINTF(fp,"%u",ambcoords[0] - chroffset + 1U);
+ for (i = 1; i < n; i++) {
+ FPRINTF(fp,",%u",ambcoords[i] - chroffset + 1U);
+ }
#else
- splicecoord = Substring_alignstart(acceptor);
- fprintf(fp,"%u",start_ambcoords[0] - splicecoord);
- for (i = 1; i < n; i++) {
- fprintf(fp,",%u",start_ambcoords[i] - splicecoord);
- }
-#endif
+ substringM = (Substring_T) List_head(finalp);
+ if (plusp == true) {
+ splicecoord = Substring_alignend(substringM);
+ } else {
+ splicecoord = Substring_alignstart(substringM);
}
+ FPRINTF(fp,"%u",ambcoords[0] - splicecoord);
+ for (i = 1; i < n; i++) {
+ FPRINTF(fp,",%u",ambcoords[i] - splicecoord);
+ }
+#endif
}
}
- /* 12. TAGS: XT */
- if (print_xt_p == true) {
- fprintf(fp,"\tXT:Z:%c%c-%c%c,%.2f,%.2f",donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob);
- fprintf(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,donor_chrpos,acceptor_strand,acceptor_chr,acceptor_chrpos);
- }
-
/* 12. TAGS: XC */
if (circularp == true) {
- fprintf(fp,"\tXC:A:+");
+ FPRINTF(fp,"\tXC:A:+");
}
/* 12. TAGS: XG */
- if (Stage3end_sarrayp(this) == true) {
- fprintf(fp,"\tXG:Z:A");
+ if (Stage3end_sarrayp(stage3end) == true) {
+ FPRINTF(fp,"\tXG:Z:A");
}
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
return;
}
-static bool
-check_cigar_halfacceptor (Substring_T acceptor, int querylength, int clipdir, int hardclip_low, int hardclip_high,
- bool first_read_p, bool circularp) {
- bool result;
- Intlist_T cigar_types = NULL;
- bool plusp, sensep;
- bool use_hardclip_p = false;
- int transloc_hardclip_low, transloc_hardclip_high;
-
- plusp = Substring_plusp(acceptor);
-
- sensep = Substring_chimera_sensep(acceptor);
-
- if (use_hardclip_p == true) {
- if (sensep != plusp) {
- transloc_hardclip_low = 0;
- if (plusp == true) {
- /* sensep false */
- transloc_hardclip_high = querylength - Substring_queryend(acceptor);
-
- } else {
- /* sensep true */
- transloc_hardclip_high = Substring_querystart(acceptor);
- }
-
- } else { /* sensep == Substring_plusp(acceptor) */
- transloc_hardclip_high = 0;
- if (plusp == true) {
- transloc_hardclip_low = Substring_querystart(acceptor);
-
- } else {
- transloc_hardclip_low = querylength - Substring_queryend(acceptor);
- }
- }
+static void
+halfdonor_dinucleotide (char *donor1, char *donor2, Substring_T donor, int sensedir) {
+ char *genomic;
+ int substring_start, substring_end;
- if (transloc_hardclip_low > hardclip_low) {
- hardclip_low = transloc_hardclip_low;
- }
- if (transloc_hardclip_high > hardclip_high) {
- hardclip_high = transloc_hardclip_high;
- }
+ genomic = Substring_genomic_refdiff(donor);
+ if (sensedir == SENSE_FORWARD) {
+ substring_end = Substring_queryend(donor);
+ *donor1 = toupper(genomic[substring_end]);
+ *donor2 = toupper(genomic[substring_end+1]);
+ } else {
+ substring_start = Substring_querystart(donor);
+ *donor2 = toupper(complCode[(int) genomic[substring_start-2]]);
+ *donor1 = toupper(complCode[(int) genomic[substring_start-1]]);
}
- if (sensep != plusp) {
- if (plusp == true) {
- /* sensep false */
- assert(Substring_chimera_pos(acceptor) == Substring_queryend(acceptor));
- if (hide_soft_clips_p == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_querystart(acceptor) +
- Substring_match_length(acceptor),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(acceptor),
- /*querypos*/Substring_queryend(acceptor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(acceptor),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(acceptor),
- /*querypos*/Substring_querystart(acceptor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(acceptor),
- /*querypos*/Substring_queryend(acceptor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- }
-
- } else {
- /* sensep true */
- assert(Substring_chimera_pos(acceptor) == Substring_querystart(acceptor));
- if (hide_soft_clips_p == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- (querylength - Substring_queryend(acceptor)) +
- Substring_match_length(acceptor),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(acceptor),
- /*querypos*/Substring_querystart(acceptor),querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/true);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(acceptor),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(acceptor),
- /*querypos*/Substring_queryend(acceptor),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(acceptor),
- /*querypos*/Substring_querystart(acceptor),querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/true);
- }
- }
+ return;
+}
- } else { /* sensep == Substring_plusp(acceptor) */
- if (plusp == true) {
- assert(Substring_chimera_pos(acceptor) == Substring_querystart(acceptor));
- if (hide_soft_clips_p == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(acceptor),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_match_length(acceptor) +
- (querylength - Substring_queryend(acceptor)),
- /*querypos*/Substring_querystart(acceptor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',Substring_querystart(acceptor),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(acceptor),
- /*querypos*/Substring_querystart(acceptor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(acceptor),
- /*querypos*/Substring_queryend(acceptor),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- }
+static void
+halfacceptor_dinucleotide (char *acceptor2, char *acceptor1, Substring_T acceptor, int sensedir) {
+ char *genomic;
+ int substring_start, substring_end;
- } else {
- assert(Substring_chimera_pos(acceptor) == Substring_queryend(acceptor));
- if (hide_soft_clips_p == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(acceptor),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_match_length(acceptor) +
- Substring_querystart(acceptor),
- /*querypos*/Substring_queryend(acceptor),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/use_hardclip_p ? 'H' : 'S',querylength - Substring_queryend(acceptor),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(acceptor),
- /*querypos*/Substring_queryend(acceptor),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(acceptor),
- /*querypos*/Substring_querystart(acceptor),querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/true);
- }
- }
+ genomic = Substring_genomic_refdiff(acceptor);
+ if (sensedir == SENSE_FORWARD) {
+ substring_start = Substring_querystart(acceptor);
+ *acceptor2 = toupper(genomic[substring_start-2]);
+ *acceptor1 = toupper(genomic[substring_start-1]);
+ } else {
+ substring_end = Substring_queryend(acceptor);
+ *acceptor1 = toupper(complCode[(int) genomic[substring_end]]);
+ *acceptor2 = toupper(complCode[(int) genomic[substring_end+1]]);
}
- result = check_cigar_types(cigar_types);
-
- Intlist_free(&cigar_types);
- return result;
+ return;
}
+
static void
-print_localsplice (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
- char *acc1, char *acc2, int pathnum, int npaths,
- int absmq_score, int first_absmq, int second_absmq, int mapq_score,
- Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
- Chrpos_T chrpos, Chrpos_T mate_chrpos, int clipdir, int hardclip_low, int hardclip_high,
- Resulttype_T resulttype, bool first_read_p, int npaths_mate,
- int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
- bool circularp) {
+print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Stage3end_T this, Stage3end_T mate,
+ char *acc1, char *acc2, int pathnum, int npaths, int absmq_score, int first_absmq, int second_absmq, int mapq_score,
+ Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
+ Chrpos_T concordant_chrpos, Chrpos_T donor_chrpos, Chrpos_T acceptor_chrpos, Chrpos_T mate_chrpos,
+ int hardclip_low, int hardclip_high, Resulttype_T resulttype, bool first_read_p, int npaths_mate,
+ int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
+ bool use_hardclip_p, bool print_xt_p, int donor_sensedir, char donor_strand, char acceptor_strand,
+ char *donor_chr, char *acceptor_chr, char donor1, char donor2, char acceptor2, char acceptor1,
+ double donor_prob, double acceptor_prob, bool circularp) {
unsigned int flag = 0U;
- Substring_T substring1, substring2;
int nmismatches_refdiff = 0, nmismatches_bothdiff = 0, querylength;
- int sensedir;
bool sensep;
char *genomicfwd_refdiff, *genomicfwd_bothdiff, *genomicdir_refdiff, *genomicdir_bothdiff;
- int substring1_start, substring2_start, substring1_length, substring2_length, matchlength;
+ int substring_start, substring_length;
+ int transloc_hardclip_low, transloc_hardclip_high;
bool plusp, printp;
+ bool start_ambig, end_ambig;
+ int n, i;
+ Univcoord_T *start_ambcoords, *end_ambcoords, splicecoord;
+#ifdef PRINT_AMBIG_COORDS
+ Univcoord_T chroffset;
+#endif
- querylength = Shortread_fulllength(queryseq);
- plusp = Stage3end_plusp(this);
- if ((sensedir = Stage3end_sensedir(this)) == SENSE_NULL) {
- sensedir = Stage3end_sensedir(mate);
- }
- sensep = (sensedir == SENSE_FORWARD);
+ querylength = Shortread_fulllength(queryseq);
+ plusp = Substring_plusp(donor);
/* 1. QNAME */
if (acc2 == NULL) {
- fprintf(fp,"%s",acc1);
+ FPRINTF(fp,"%s",acc1);
} else {
- fprintf(fp,"%s,%s",acc1,acc2);
+ FPRINTF(fp,"%s,%s",acc1,acc2);
}
/* 2. FLAG */
flag = SAM_compute_flag(plusp,mate,resulttype,first_read_p,
pathnum,npaths,npaths_mate,absmq_score,first_absmq,
invertp,invert_mate_p);
- fprintf(fp,"\t%u",flag);
+ FPRINTF(fp,"\t%u",flag);
/* 3. RNAME: chr */
/* 4. POS: chrpos */
- print_chromosomal_pos(fp,Stage3end_chrnum(this),chrpos,Stage3end_chrlength(this),chromosome_iit);
-
+ print_chromosomal_pos(fp,Substring_chrnum(donor),donor_chrpos,Substring_chrlength(donor),chromosome_iit);
+
/* 5. MAPQ: Mapping quality */
- fprintf(fp,"\t%d",mapq_score);
+ FPRINTF(fp,"\t%d",mapq_score);
/* 6. CIGAR */
- fprintf(fp,"\t");
- if (sensep == plusp) {
- substring1 = /* donor */ Stage3end_substring_donor(this);
- substring2 = /* acceptor */ Stage3end_substring_acceptor(this);
+ FPRINTF(fp,"\t");
+ if (Stage3end_sensedir(this) == SENSE_ANTI) {
+ sensep = false;
} else {
- substring1 = /* acceptor */ Stage3end_substring_acceptor(this);
- substring2 = /* donor */ Stage3end_substring_donor(this);
+ sensep = true;
}
- if (plusp == true) {
- if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/'M',
- Substring_querystart(substring1) +
- Substring_match_length(substring1),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- if (hardclip_low < Substring_queryend(substring1) &&
- querylength - hardclip_high > Substring_querystart(substring2)) {
- debug1(printf("\ncase 1: hardclip_low %d < queryend(substring1) %d && querylength %d - hardclip_high %d > querystart(substring2) %d\n",
- hardclip_low,Substring_queryend(substring1),querylength,hardclip_high,Substring_querystart(substring2)));
- fprintf(fp,"%uN",Stage3end_distance(this));
+ if (use_hardclip_p == true) {
+ if (sensep == true) {
+ if (plusp == true) {
+ transloc_hardclip_low = 0;
+ transloc_hardclip_high = querylength - Substring_queryend(donor);
+ } else {
+ transloc_hardclip_high = 0;
+ transloc_hardclip_low = querylength - Substring_queryend(donor);
+ }
+
+ } else {
+ if (plusp == true) {
+ transloc_hardclip_high = 0;
+ transloc_hardclip_low = Substring_querystart(donor);
+ } else {
+ transloc_hardclip_low = 0;
+ transloc_hardclip_high = Substring_querystart(donor);
+ }
+ }
+
+ if (transloc_hardclip_low > hardclip_low) {
+ hardclip_low = transloc_hardclip_low;
+ }
+ if (transloc_hardclip_high > hardclip_high) {
+ hardclip_high = transloc_hardclip_high;
+ }
+ }
+
+
+ if (sensep == true) {
+ assert(Substring_chimera_pos(donor) == Substring_queryend(donor));
+ if (plusp == true) {
+ /* sensep true, plusp true */
+ /* FPRINTF(fp,"donor sensep true, plusp true\n"); */
+ if (hide_soft_clips_p == true) {
+ print_cigar(fp,/*type*/'M',
+ Substring_querystart(donor) +
+ Substring_match_length(donor),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ print_cigar(fp,/*type*/'E',querylength - Substring_queryend(donor),
+ /*querypos*/Substring_queryend(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,
+ /*trimlength*/Substring_trim_right(donor));
+
+ } else {
+ print_cigar(fp,/*type*/'S',Substring_querystart(donor),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ print_cigar(fp,/*type*/'M',Substring_match_length(donor),
+ /*querypos*/Substring_querystart(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,
+ /*trimlength*/0);
+ print_cigar(fp,/*type*/'E',querylength - Substring_queryend(donor),
+ /*querypos*/Substring_queryend(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,
+ /*trimlength*/Substring_trim_right(donor));
}
- print_cigar(fp,/*type*/'M',
- Substring_match_length(substring2) +
- (querylength - Substring_queryend(substring2)),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+
} else {
- print_cigar(fp,/*type*/'S',Substring_querystart(substring1),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- print_cigar(fp,/*type*/'M',Substring_match_length(substring1),
- /*querypos*/Substring_querystart(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- if (hardclip_low < Substring_queryend(substring1) &&
- querylength - hardclip_high > Substring_querystart(substring2)) {
- debug1(printf("\ncase 1: hardclip_low %d < queryend(substring1) %d && querylength %d - hardclip_high %d > querystart(substring2) %d\n",
- hardclip_low,Substring_queryend(substring1),querylength,hardclip_high,Substring_querystart(substring2)));
- fprintf(fp,"%uN",Stage3end_distance(this));
+ /* sensep true, plusp false */
+ /* FPRINTF(fp,"donor sensep false, plusp false\n"); */
+ if (hide_soft_clips_p == true) {
+ print_cigar(fp,/*type*/'E',querylength - Substring_queryend(donor),
+ /*querypos*/querylength,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
+ /*trimlength*/Substring_trim_right(donor));
+ print_cigar(fp,/*type*/'M',
+ Substring_match_length(donor) +
+ Substring_querystart(donor),
+ /*querypos*/Substring_queryend(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,
+ /*trimlength*/0);
+ } else {
+ print_cigar(fp,/*type*/'E',querylength - Substring_queryend(donor),
+ /*querypos*/querylength,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
+ /*trimlength*/Substring_trim_right(donor));
+ print_cigar(fp,/*type*/'M',Substring_match_length(donor),
+ /*querypos*/Substring_queryend(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
+ /*trimlength*/0);
+ print_cigar(fp,/*type*/'S',Substring_querystart(donor),
+ /*querypos*/Substring_querystart(donor),querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/true,/*trimlength*/0);
}
- print_cigar(fp,/*type*/'M',Substring_match_length(substring2),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring2),
- /*querypos*/Substring_queryend(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
}
} else {
- if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/'M',
- (querylength - Substring_queryend(substring1)) +
- Substring_match_length(substring1),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/false);
- if (querylength - hardclip_low > Substring_queryend(substring2) &&
- hardclip_high < Substring_querystart(substring1)) {
- debug1(printf("\ncase 2: querylength %d - hardclip_low %d > queryend(substring2) %d && hardclip_high %d < querystart(substring1) %d\n",
- querylength,hardclip_low,Substring_queryend(substring2),hardclip_high,Substring_querystart(substring1)));
- fprintf(fp,"%uN",Stage3end_distance(this));
+ assert(Substring_chimera_pos(donor) == Substring_querystart(donor));
+ if (plusp == true) {
+ /* sensep false, plusp true */
+ /* FPRINTF(fp,"donor sensep false, plusp true\n"); */
+ if (hide_soft_clips_p == true) {
+ print_cigar(fp,/*type*/'E',Substring_querystart(donor),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false,/*trimlength*/Substring_trim_left(donor));
+ print_cigar(fp,/*type*/'M',Substring_match_length(donor) + (querylength - Substring_queryend(donor)),
+ /*querypos*/Substring_querystart(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,
+ /*trimlength*/0);
+ } else {
+ print_cigar(fp,/*type*/'E',Substring_querystart(donor),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false,/*trimlength*/Substring_trim_left(donor));
+ print_cigar(fp,/*type*/'M',Substring_match_length(donor),
+ /*querypos*/Substring_querystart(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,
+ /*trimlength*/0);
+ print_cigar(fp,/*type*/'S',querylength - Substring_queryend(donor),
+ /*querypos*/Substring_queryend(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,
+ /*trimlength*/0);
}
- print_cigar(fp,/*type*/'M',
- Substring_match_length(substring2) +
- Substring_querystart(substring2),
- /*querypos*/Substring_querystart(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+
} else {
- print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring1),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- print_cigar(fp,/*type*/'M',Substring_match_length(substring1),
- /*querypos*/Substring_queryend(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- if (querylength - hardclip_low > Substring_queryend(substring2) &&
- hardclip_high < Substring_querystart(substring1)) {
- debug1(printf("\ncase 2: querylength %d - hardclip_low %d > queryend(substring2) %d && hardclip_high %d < querystart(substring1) %d\n",
- querylength,hardclip_low,Substring_queryend(substring2),hardclip_high,Substring_querystart(substring1)));
- fprintf(fp,"%uN",Stage3end_distance(this));
+ /* sensep false, plusp false */
+ /* FPRINTF(fp,"donor sensep true, plusp false\n"); */
+ if (hide_soft_clips_p == true) {
+ print_cigar(fp,/*type*/'M',(querylength - Substring_queryend(donor)) + Substring_match_length(donor),
+ /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/false,/*trimlength*/0);
+ print_cigar(fp,/*type*/'E',Substring_querystart(donor),
+ /*querypos*/Substring_querystart(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,
+ /*trimlength*/Substring_trim_left(donor));
+
+ } else {
+ print_cigar(fp,/*type*/'S',querylength - Substring_queryend(donor),
+ /*querypos*/querylength,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
+ /*trimlength*/0);
+ print_cigar(fp,/*type*/'M',Substring_match_length(donor),
+ /*querypos*/Substring_queryend(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
+ /*trimlength*/0);
+ print_cigar(fp,/*type*/'E',Substring_querystart(donor),
+ /*querypos*/Substring_querystart(donor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,
+ /*trimlength*/Substring_trim_left(donor));
}
- print_cigar(fp,/*type*/'M',Substring_match_length(substring2),
- /*querypos*/Substring_querystart(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- print_cigar(fp,/*type*/'S',Substring_querystart(substring2),
- /*querypos*/Substring_querystart(substring2),querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/true);
}
}
-
/* 7. MRNM: Mate chr */
/* 8. MPOS: Mate chrpos */
+ /* For anchor_chrnum, previously used Stage3end_chrnum(this), but this is 0 */
print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
mate_chrpos,Stage3end_chrlength(mate),
- Stage3end_chrnum(this),chrpos,chromosome_iit);
+ /*anchor_chrnum*/Substring_chrnum(donor),donor_chrpos,chromosome_iit);
/* 9. ISIZE: Insert size */
if (resulttype == CONCORDANT_UNIQ || resulttype == CONCORDANT_TRANSLOC || resulttype == CONCORDANT_MULT) {
if (plusp == invertp) {
- fprintf(fp,"\t%d",-pairedlength);
+ FPRINTF(fp,"\t%d",-pairedlength);
} else {
- fprintf(fp,"\t%d",pairedlength);
+ FPRINTF(fp,"\t%d",pairedlength);
}
} else if (mate_chrpos == 0) {
- fprintf(fp,"\t%d",pairedlength);
- } else if (chrpos < mate_chrpos) {
- fprintf(fp,"\t%d",pairedlength);
- } else if (chrpos > mate_chrpos) {
- fprintf(fp,"\t%d",-pairedlength);
+ FPRINTF(fp,"\t%d",pairedlength);
+#if 0
+ } else if (concordant_chrpos < mate_chrpos) {
+ FPRINTF(fp,"\t%d",pairedlength);
+ } else if (concordant_chrpos > mate_chrpos) {
+ FPRINTF(fp,"\t%d",-pairedlength);
+#endif
} else if (first_read_p == true) {
- fprintf(fp,"\t%d",pairedlength);
+ FPRINTF(fp,"\t%d",pairedlength);
} else {
- fprintf(fp,"\t%d",-pairedlength);
+ FPRINTF(fp,"\t%d",-pairedlength);
}
/* 10. SEQ: queryseq and 11. QUAL: quality scores */
/* Queryseq has already been inverted, so just measure plusp relative to its current state */
- fprintf(fp,"\t");
if (plusp == true) {
- Shortread_print_chopped(fp,queryseq,hardclip_low,hardclip_high);
- fprintf(fp,"\t");
+ Shortread_print_chopped_sam(fp,queryseq,hardclip_low,hardclip_high);
+ FPRINTF(fp,"\t");
Shortread_print_quality(fp,queryseq,hardclip_low,hardclip_high,
- quality_shift,/*show_chopped_p*/false);
+ quality_shift,/*show_chopped_p*/false);
} else {
- Shortread_print_chopped_revcomp(fp,queryseq,hardclip_low,hardclip_high);
- fprintf(fp,"\t");
+ Shortread_print_chopped_revcomp_sam(fp,queryseq,hardclip_low,hardclip_high);
+ FPRINTF(fp,"\t");
Shortread_print_quality_revcomp(fp,queryseq,hardclip_low,hardclip_high,
- quality_shift,/*show_chopped_p*/false);
- }
+ quality_shift,/*show_chopped_p*/false);
+ }
+
/* 12. TAGS: RG */
if (sam_read_group_id != NULL) {
- fprintf(fp,"\tRG:Z:%s",sam_read_group_id);
+ FPRINTF(fp,"\tRG:Z:%s",sam_read_group_id);
}
- /* 12. TAGS: XH */
+ /* 12. TAGS: XH and XI */
if (hardclip_low > 0 || hardclip_high > 0) {
- fprintf(fp,"\tXH:Z:");
+ FPRINTF(fp,"\tXH:Z:");
if (plusp == true) {
Shortread_print_chopped_end(fp,queryseq,hardclip_low,hardclip_high);
} else {
Shortread_print_chopped_end_revcomp(fp,queryseq,hardclip_low,hardclip_high);
}
+
+ if (Shortread_quality_string(queryseq) != NULL) {
+ FPRINTF(fp,"\tXI:Z:");
+ if (plusp == true) {
+ Shortread_print_chopped_end_quality(fp,queryseq,hardclip_low,hardclip_high);
+ } else {
+ Shortread_print_chopped_end_quality_reverse(fp,queryseq,hardclip_low,hardclip_high);
+ }
+ }
}
/* 12. TAGS: XB */
@@ -4496,526 +2430,503 @@ print_localsplice (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
Shortread_print_chop(fp,queryseq,invertp);
/* 12. TAGS: MD */
- fprintf(fp,"\tMD:Z:");
+ FPRINTF(fp,"\tMD:Z:");
printp = false;
if (hide_soft_clips_p == true) {
- substring1_start = Substring_querystart_orig(substring1);
- substring1_length = Substring_match_length_orig(substring1);
- substring2_start = Substring_querystart_orig(substring2);
- substring2_length = Substring_match_length_orig(substring2);
- } else {
- substring1_start = Substring_querystart(substring1);
- substring1_length = Substring_match_length(substring1);
- substring2_start = Substring_querystart(substring2);
- substring2_length = Substring_match_length(substring2);
- }
-
- if (plusp == true) {
- genomicfwd_refdiff = Substring_genomic_refdiff(substring1);
- genomicfwd_bothdiff = Substring_genomic_bothdiff(substring1);
- matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
- &(genomicfwd_refdiff[substring1_start]),&(genomicfwd_bothdiff[substring1_start]),
- substring1_length,/*querypos*/substring1_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
-
-#if 0
- /* Intron: Gets skipped in MD string */
- fprintf(fp,"^");
-#endif
-
- genomicfwd_refdiff = Substring_genomic_refdiff(substring2);
- genomicfwd_bothdiff = Substring_genomic_bothdiff(substring2);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,matchlength,
- &(genomicfwd_refdiff[substring2_start]),&(genomicfwd_bothdiff[substring2_start]),
- substring2_length,/*querypos*/substring2_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
-
- } else {
- genomicdir_refdiff = Substring_genomic_refdiff(substring1);
- genomicdir_bothdiff = Substring_genomic_bothdiff(substring1);
- if (genomicdir_bothdiff == genomicdir_refdiff) {
- genomicfwd_refdiff = (char *) MALLOCA((substring1_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring1_start]),substring1_length);
- matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
- substring1_length,/*querypos*/substring1_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- FREEA(genomicfwd_refdiff);
- } else {
- genomicfwd_refdiff = (char *) MALLOCA((substring1_length+1) * sizeof(char));
- genomicfwd_bothdiff = (char *) MALLOCA((substring1_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring1_start]),substring1_length);
- make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring1_start]),substring1_length);
- matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
- substring1_length,/*querypos*/substring1_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- FREEA(genomicfwd_bothdiff);
- FREEA(genomicfwd_refdiff);
- }
-
-#if 0
- /* Intron: Gets skipped in MD string */
- fprintf(fp,"^");
-#endif
+ substring_start = Substring_querystart_orig(donor);
+ substring_length = Substring_match_length_orig(donor);
+ } else {
+ substring_start = Substring_querystart(donor);
+ substring_length = Substring_match_length(donor);
+ }
- genomicdir_refdiff = Substring_genomic_refdiff(substring2);
- genomicdir_bothdiff = Substring_genomic_bothdiff(substring2);
- if (genomicdir_bothdiff == genomicdir_refdiff) {
- genomicfwd_refdiff = (char *) MALLOCA((substring2_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring2_start]),substring2_length);
+ if (use_hardclip_p == false) {
+ genomicdir_refdiff = Substring_genomic_refdiff(donor);
+ genomicdir_bothdiff = Substring_genomic_bothdiff(donor);
+ if (plusp == true) {
+ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
+ &(genomicdir_refdiff[substring_start]),&(genomicdir_bothdiff[substring_start]),
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ } else if (genomicdir_bothdiff == genomicdir_refdiff) {
+ genomicfwd_refdiff = (char *) MALLOCA((querylength+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,matchlength,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
- substring2_length,/*querypos*/substring2_start,querylength,
+ fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
+ substring_length,/*querypos*/substring_start,querylength,
hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
FREEA(genomicfwd_refdiff);
} else {
- genomicfwd_refdiff = (char *) MALLOCA((substring2_length+1) * sizeof(char));
- genomicfwd_bothdiff = (char *) MALLOCA((substring2_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring2_start]),substring2_length);
- make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring2_start]),substring2_length);
+ genomicfwd_refdiff = (char *) MALLOCA((querylength+1) * sizeof(char));
+ genomicfwd_bothdiff = (char *) MALLOCA((querylength+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+ make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring_start]),substring_length);
print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,matchlength,genomicfwd_refdiff,genomicfwd_bothdiff,
- substring2_length,/*querypos*/substring2_start,querylength,
+ fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
+ substring_length,/*querypos*/substring_start,querylength,
hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
FREEA(genomicfwd_bothdiff);
FREEA(genomicfwd_refdiff);
}
+
+ } else if (sensep == true) {
+ if (plusp == true) {
+ genomicfwd_refdiff = Substring_genomic_refdiff(donor);
+ genomicfwd_bothdiff = Substring_genomic_bothdiff(donor);
+ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
+ &(genomicfwd_refdiff[substring_start]),&(genomicfwd_bothdiff[substring_start]),
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ } else {
+ genomicdir_refdiff = Substring_genomic_refdiff(donor);
+ genomicdir_bothdiff = Substring_genomic_bothdiff(donor);
+ if (genomicdir_bothdiff == genomicdir_refdiff) {
+ genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ FREEA(genomicfwd_refdiff);
+ } else {
+ genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ genomicfwd_bothdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+ make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring_start]),substring_length);
+ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ FREEA(genomicfwd_bothdiff);
+ FREEA(genomicfwd_refdiff);
+ }
+ }
+
+ } else { /* sensep == false */
+ if (plusp == true) {
+ genomicfwd_refdiff = Substring_genomic_refdiff(donor);
+ genomicfwd_bothdiff = Substring_genomic_bothdiff(donor);
+ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
+ &(genomicfwd_refdiff[substring_start]),&(genomicfwd_bothdiff[substring_start]),
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ } else {
+ genomicdir_refdiff = Substring_genomic_refdiff(donor);
+ genomicdir_bothdiff = Substring_genomic_refdiff(donor);
+ if (genomicdir_bothdiff == genomicdir_refdiff) {
+ genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ FREEA(genomicfwd_refdiff);
+ } else {
+ genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ genomicfwd_bothdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+ make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring_start]),substring_length);
+ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ FREEA(genomicfwd_bothdiff);
+ FREEA(genomicfwd_refdiff);
+ }
+ }
}
if (printp == false) {
- fprintf(fp,"0");
+ FPRINTF(fp,"0");
}
/* 12. TAGS: NH */
- fprintf(fp,"\tNH:i:%d",npaths);
-
/* 12. TAGS: HI */
- fprintf(fp,"\tHI:i:%d",pathnum);
-
/* 12. TAGS: NM */
- /* fprintf(fp,"\tNM:i:%d",Stage3end_nmismatches_refdiff(this)); */
- fprintf(fp,"\tNM:i:%d",nmismatches_refdiff);
+ FPRINTF(fp,"\tNH:i:%d\tHI:i:%d\tNM:i:%d",npaths,pathnum,nmismatches_refdiff);
if (snps_iit) {
/* 12. TAGS: XW and XV */
- fprintf(fp,"\tXW:i:%d",nmismatches_bothdiff);
- fprintf(fp,"\tXV:i:%d",nmismatches_refdiff - nmismatches_bothdiff);
+ FPRINTF(fp,"\tXW:i:%d",nmismatches_bothdiff);
+ FPRINTF(fp,"\tXV:i:%d",nmismatches_refdiff - nmismatches_bothdiff);
}
/* 12. TAGS: SM */
- fprintf(fp,"\tSM:i:%d",mapq_score);
-
/* 12. TAGS: XQ */
- fprintf(fp,"\tXQ:i:%d",absmq_score);
-
/* 12. TAGS: X2 */
- fprintf(fp,"\tX2:i:%d",second_absmq);
+ FPRINTF(fp,"\tSM:i:%d\tXQ:i:%d\tX2:i:%d",mapq_score,absmq_score,second_absmq);
/* 12. TAGS: XO */
- fprintf(fp,"\tXO:Z:%s",abbrev);
+ FPRINTF(fp,"\tXO:Z:%s",abbrev);
/* 12. TAGS: XS */
- if (sensedir == SENSE_FORWARD) {
- if (plusp == true) {
- fprintf(fp,"\tXS:A:+");
- } else {
- fprintf(fp,"\tXS:A:-");
- }
- } else if (sensedir == SENSE_ANTI) {
+ assert(donor_sensedir != SENSE_NULL);
+ FPRINTF(fp,"\tXS:A:%c",donor_strand);
+
+ /* 12. TAGS: XA */
+ if ((start_ambig = Stage3end_start_ambiguous_p(this)) == true ||
+ (end_ambig = Stage3end_end_ambiguous_p(this)) == true) {
+ FPRINTF(fp,"\tXA:Z:");
+
if (plusp == true) {
- fprintf(fp,"\tXS:A:-");
+ if ((n = Stage3end_start_nambcoords(this)) > 0) {
+ assert(sensep == false);
+ start_ambcoords = Stage3end_start_ambcoords(this);
+ splicecoord = Substring_alignstart(donor);
+#ifdef PRINT_AMBIG_COORDS
+ chroffset = Substring_chroffset(donor);
+ FPRINTF(fp,"%u",start_ambcoords[0] - chroffset + 1U);
+ for (i = 1; i < n; i++) {
+ FPRINTF(fp,",%u",start_ambcoords[i] - chroffset + 1U);
+ }
+#else
+ splicecoord = Substring_alignstart(donor);
+ FPRINTF(fp,"%u",splicecoord - start_ambcoords[0]);
+ for (i = 1; i < n; i++) {
+ FPRINTF(fp,",%u",splicecoord - start_ambcoords[i]);
+ }
+#endif
+ }
+ FPRINTF(fp,"|");
+ if ((n = Stage3end_end_nambcoords(this)) > 0) {
+ assert(sensep == true);
+ end_ambcoords = Stage3end_end_ambcoords(this);
+#ifdef PRINT_AMBIG_COORDS
+ chroffset = Substring_chroffset(donor);
+ FPRINTF(fp,"%u",end_ambcoords[0] - chroffset + 1U);
+ for (i = 1; i < n; i++) {
+ FPRINTF(fp,",%u",end_ambcoords[i] - chroffset + 1U);
+ }
+#else
+ splicecoord = Substring_alignend(donor);
+ FPRINTF(fp,"%u",end_ambcoords[0] - splicecoord);
+ for (i = 1; i < n; i++) {
+ FPRINTF(fp,",%u",end_ambcoords[i] - splicecoord);
+ }
+#endif
+ }
+
} else {
- fprintf(fp,"\tXS:A:+");
+ if ((n = Stage3end_end_nambcoords(this)) > 0) {
+ assert(sensep == true);
+ end_ambcoords = Stage3end_end_ambcoords(this);
+#ifdef PRINT_AMBIG_COORDS
+ chroffset = Substring_chroffset(donor);
+ FPRINTF(fp,"%u",end_ambcoords[0] - chroffset + 1U);
+ for (i = 1; i < n; i++) {
+ FPRINTF(fp,",%u",end_ambcoords[i] - chroffset + 1U);
+ }
+#else
+ splicecoord = Substring_alignend(donor);
+ FPRINTF(fp,"%u",splicecoord - end_ambcoords[0]);
+ for (i = 1; i < n; i++) {
+ FPRINTF(fp,",%u",splicecoord - end_ambcoords[i]);
+ }
+#endif
+ }
+ FPRINTF(fp,"|");
+ if ((n = Stage3end_start_nambcoords(this)) > 0) {
+ assert(sensep == false);
+ start_ambcoords = Stage3end_start_ambcoords(this);
+#ifdef PRINT_AMBIG_COORDS
+ chroffset = Substring_chroffset(donor);
+ FPRINTF(fp,"%u",start_ambcoords[0] - chroffset + 1U);
+ for (i = 1; i < n; i++) {
+ FPRINTF(fp,",%u",start_ambcoords[i] - chroffset + 1U);
+ }
+#else
+ splicecoord = Substring_alignstart(donor);
+ FPRINTF(fp,"%u",start_ambcoords[0] - splicecoord);
+ for (i = 1; i < n; i++) {
+ FPRINTF(fp,",%u",start_ambcoords[i] - splicecoord);
+ }
+#endif
+ }
}
- } else if (force_xs_direction_p == true) {
- fprintf(fp,"\tXS:A:+");
- } else {
- fprintf(fp,"\tXS:A:?");
}
- /* 12. TAGS: XA */
- assert(Stage3end_start_ambiguous_p(this) == false);
- assert(Stage3end_end_ambiguous_p(this) == false);
+ /* 12. TAGS: XT */
+ if (print_xt_p == true) {
+ FPRINTF(fp,"\tXT:Z:%c%c-%c%c,%.2f,%.2f",donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob);
+ FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,donor_chrpos,acceptor_strand,acceptor_chr,acceptor_chrpos);
+ }
/* 12. TAGS: XC */
if (circularp == true) {
- fprintf(fp,"\tXC:A:+");
+ FPRINTF(fp,"\tXC:A:+");
}
/* 12. TAGS: XG */
if (Stage3end_sarrayp(this) == true) {
- fprintf(fp,"\tXG:Z:A");
+ FPRINTF(fp,"\tXG:Z:A");
}
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
return;
}
-static bool
-check_cigar_localsplice (Stage3end_T this, Stage3end_T mate, int querylength, int clipdir, int hardclip_low, int hardclip_high,
- bool first_read_p, bool circularp) {
- bool result;
- Intlist_T cigar_types = NULL;
- Substring_T substring1, substring2;
- bool plusp, sensep;
- int sensedir;
-
- plusp = Stage3end_plusp(this);
-
- if ((sensedir = Stage3end_sensedir(this)) == SENSE_NULL) {
- sensedir = Stage3end_sensedir(mate);
- }
- sensep = (sensedir == SENSE_FORWARD);
-
- if (sensep == plusp) {
- substring1 = /* donor */ Stage3end_substring_donor(this);
- substring2 = /* acceptor */ Stage3end_substring_acceptor(this);
- } else {
- substring1 = /* acceptor */ Stage3end_substring_acceptor(this);
- substring2 = /* donor */ Stage3end_substring_donor(this);
- }
-
- if (plusp == true) {
- if (hide_soft_clips_p == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_querystart(substring1) +
- Substring_match_length(substring1),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- if (hardclip_low < Substring_queryend(substring1) &&
- querylength - hardclip_high > Substring_querystart(substring2)) {
- debug1(printf("\ncase 1: hardclip_low %d < queryend(substring1) %d && querylength %d - hardclip_high %d > querystart(substring2) %d\n",
- hardclip_low,Substring_queryend(substring1),querylength,hardclip_high,Substring_querystart(substring2)));
- cigar_types = Intlist_push(cigar_types,'N');
- }
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_match_length(substring2) +
- (querylength - Substring_queryend(substring2)),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring1),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring1),
- /*querypos*/Substring_querystart(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- if (hardclip_low < Substring_queryend(substring1) &&
- querylength - hardclip_high > Substring_querystart(substring2)) {
- debug1(printf("\ncase 1: hardclip_low %d < queryend(substring1) %d && querylength %d - hardclip_high %d > querystart(substring2) %d\n",
- hardclip_low,Substring_queryend(substring1),querylength,hardclip_high,Substring_querystart(substring2)));
- cigar_types = Intlist_push(cigar_types,'N');
- }
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring2),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring2),
- /*querypos*/Substring_queryend(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- }
-
- } else {
- if (hide_soft_clips_p == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- (querylength - Substring_queryend(substring1)) +
- Substring_match_length(substring1),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/false);
- if (querylength - hardclip_low > Substring_queryend(substring2) &&
- hardclip_high < Substring_querystart(substring1)) {
- debug1(printf("\ncase 2: querylength %d - hardclip_low %d > queryend(substring2) %d && hardclip_high %d < querystart(substring1) %d\n",
- querylength,hardclip_low,Substring_queryend(substring2),hardclip_high,Substring_querystart(substring1)));
- cigar_types = Intlist_push(cigar_types,'N');
- }
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_match_length(substring2) +
- Substring_querystart(substring2),
- /*querypos*/Substring_querystart(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring1),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring1),
- /*querypos*/Substring_queryend(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- if (querylength - hardclip_low > Substring_queryend(substring2) &&
- hardclip_high < Substring_querystart(substring1)) {
- debug1(printf("\ncase 2: querylength %d - hardclip_low %d > queryend(substring2) %d && hardclip_high %d < querystart(substring1) %d\n",
- querylength,hardclip_low,Substring_queryend(substring2),hardclip_high,Substring_querystart(substring1)));
- cigar_types = Intlist_push(cigar_types,'N');
- }
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring2),
- /*querypos*/Substring_querystart(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring2),
- /*querypos*/Substring_querystart(substring2),querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/true);
- }
- }
-
- result = check_cigar_types(cigar_types);
-
- Intlist_free(&cigar_types);
- return result;
-}
-
static void
-print_shortexon (FILE *fp, char *abbrev, Stage3end_T shortexon, Stage3end_T mate,
- char *acc1, char *acc2, int pathnum, int npaths,
- int absmq_score, int first_absmq, int second_absmq, int mapq_score,
- Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
- Chrpos_T chrpos, Chrpos_T mate_chrpos, int clipdir, int hardclip_low, int hardclip_high,
- Resulttype_T resulttype, bool first_read_p, int npaths_mate,
- int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
- bool circularp) {
+print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T acceptor, Stage3end_T this, Stage3end_T mate,
+ char *acc1, char *acc2, int pathnum, int npaths, int absmq_score, int first_absmq, int second_absmq, int mapq_score,
+ Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
+ Chrpos_T concordant_chrpos, Chrpos_T donor_chrpos, Chrpos_T acceptor_chrpos, Chrpos_T mate_chrpos,
+ int hardclip_low, int hardclip_high, Resulttype_T resulttype, bool first_read_p, int npaths_mate,
+ int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
+ bool use_hardclip_p, bool print_xt_p, int acceptor_sensedir, char donor_strand, char acceptor_strand,
+ char *donor_chr, char *acceptor_chr, char donor1, char donor2, char acceptor2, char acceptor1,
+ double donor_prob, double acceptor_prob, bool circularp) {
unsigned int flag = 0U;
- /* substring1 is low coordinate on genome, substring2 is high */
- Substring_T substring1, substring2, substringM;
- Chrpos_T distance1, distance2;
int nmismatches_refdiff = 0, nmismatches_bothdiff = 0, querylength;
- int sensedir;
bool sensep;
char *genomicfwd_refdiff, *genomicfwd_bothdiff, *genomicdir_refdiff, *genomicdir_bothdiff;
- int substring1_start, substring2_start, substringM_start,
- substring1_length, substring2_length, substringM_length, matchlength;
+ int substring_start, substring_length;
+ int transloc_hardclip_low, transloc_hardclip_high;
bool plusp, printp;
bool start_ambig, end_ambig;
- int amb_length_start, amb_length_end;
int n, i;
Univcoord_T *start_ambcoords, *end_ambcoords, splicecoord;
#ifdef PRINT_AMBIG_COORDS
Univcoord_T chroffset;
#endif
-
- querylength = Shortread_fulllength(queryseq);
- plusp = Stage3end_plusp(shortexon);
- if ((sensedir = Stage3end_sensedir(shortexon)) == SENSE_NULL) {
- sensedir = Stage3end_sensedir(mate);
- }
- sensep = (sensedir == SENSE_FORWARD);
+ querylength = Shortread_fulllength(queryseq);
+ plusp = Substring_plusp(acceptor);
/* 1. QNAME */
if (acc2 == NULL) {
- fprintf(fp,"%s",acc1);
+ FPRINTF(fp,"%s",acc1);
} else {
- fprintf(fp,"%s,%s",acc1,acc2);
+ FPRINTF(fp,"%s,%s",acc1,acc2);
}
/* 2. FLAG */
flag = SAM_compute_flag(plusp,mate,resulttype,first_read_p,
pathnum,npaths,npaths_mate,absmq_score,first_absmq,
invertp,invert_mate_p);
- fprintf(fp,"\t%u",flag);
+ FPRINTF(fp,"\t%u",flag);
/* 3. RNAME: chr */
/* 4. POS: chrpos */
- print_chromosomal_pos(fp,Stage3end_chrnum(shortexon),chrpos,Stage3end_chrlength(shortexon),chromosome_iit);
+ print_chromosomal_pos(fp,Substring_chrnum(acceptor),acceptor_chrpos,Substring_chrlength(acceptor),chromosome_iit);
/* 5. MAPQ: Mapping quality */
- fprintf(fp,"\t%d",mapq_score);
+ FPRINTF(fp,"\t%d",mapq_score);
/* 6. CIGAR */
- fprintf(fp,"\t");
- substringM = Stage3end_substring1(shortexon);
-
- if (sensep == plusp) {
- substring1 = /* donor */ Stage3end_substringD(shortexon);
- distance1 = Stage3end_shortexonA_distance(shortexon);
- distance2 = Stage3end_shortexonD_distance(shortexon);
- substring2 = /* acceptor */ Stage3end_substringA(shortexon);
+ FPRINTF(fp,"\t");
+ if (Stage3end_sensedir(this) == SENSE_ANTI) {
+ sensep = false;
} else {
- substring1 = /* acceptor */ Stage3end_substringA(shortexon);
- distance1 = Stage3end_shortexonD_distance(shortexon);
- distance2 = Stage3end_shortexonA_distance(shortexon);
- substring2 = /* donor */ Stage3end_substringD(shortexon);
+ sensep = true;
}
- if (substring1 == NULL) {
- if (plusp == true) {
- print_cigar(fp,/*type*/'S',Substring_querystart(substringM),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- } else {
- print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substringM),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- }
+ if (use_hardclip_p == true) {
+ if (sensep == true) {
+ if (plusp == true) {
+ transloc_hardclip_high = 0;
+ transloc_hardclip_low = Substring_querystart(acceptor);
+ } else {
+ transloc_hardclip_low = 0;
+ transloc_hardclip_high = Substring_querystart(acceptor);
+ }
- } else if (plusp == true) {
- if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/'M',
- Substring_querystart(substring1) +
- Substring_match_length(substring1),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
} else {
- print_cigar(fp,/*type*/'S',Substring_querystart(substring1),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- print_cigar(fp,/*type*/'M',Substring_match_length(substring1),
- /*querypos*/Substring_querystart(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- }
- if (hardclip_low < Substring_queryend(substring1) &&
- querylength - hardclip_high > Substring_querystart(substringM)) {
- debug1(printf("\ncase 3: hardclip_low %d < queryend(substring1) %d && querylength %d - hardclip_high %d > querystart(substringM) %d\n",
- hardclip_low,Substring_queryend(substring1),querylength,hardclip_high,Substring_querystart(substringM)));
- fprintf(fp,"%uN",distance1);
+ if (plusp == true) {
+ transloc_hardclip_low = 0;
+ transloc_hardclip_high = querylength - Substring_queryend(acceptor);
+ } else {
+ transloc_hardclip_high = 0;
+ transloc_hardclip_low = querylength - Substring_queryend(acceptor);
+ }
}
- } else {
- if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/'M',
- (querylength - Substring_queryend(substring1)) +
- Substring_match_length(substring1),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/false);
- } else {
- print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring1),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- print_cigar(fp,/*type*/'M',Substring_match_length(substring1),
- /*querypos*/Substring_queryend(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ if (transloc_hardclip_low > hardclip_low) {
+ hardclip_low = transloc_hardclip_low;
}
- if (querylength - hardclip_low > Substring_queryend(substringM) &&
- hardclip_high < Substring_querystart(substring1)) {
- debug1(printf("\ncase 4: querylength %d - hardclip_low %d > queryend(substringM) %d && hardclip_high %d < querystart(substring1) %d\n",
- querylength,hardclip_low,Substring_queryend(substringM),hardclip_high,Substring_querystart(substring1)));
- fprintf(fp,"%uN",distance1);
+ if (transloc_hardclip_high > hardclip_high) {
+ hardclip_high = transloc_hardclip_high;
}
}
- if (plusp == true) {
- print_cigar(fp,/*type*/'M',Substring_match_length(substringM),
- /*querypos*/Substring_querystart(substringM),querylength,
- hardclip_low,hardclip_high,plusp,/*lastp*/false);
- } else {
- print_cigar(fp,/*type*/'M',Substring_match_length(substringM),
- /*querypos*/Substring_queryend(substringM),querylength,
- hardclip_low,hardclip_high,plusp,/*lastp*/false);
- }
- if (substring2 == NULL) {
+ if (sensep == true) {
+ assert(Substring_chimera_pos(acceptor) == Substring_querystart(acceptor));
if (plusp == true) {
- print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substringM),
- /*querypos*/Substring_queryend(substringM),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else {
- print_cigar(fp,/*type*/'S',Substring_querystart(substringM),
- /*querypos*/Substring_querystart(substringM),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- }
+ /* sensep true, plusp true */
+ /* FPRINTF(fp,"acceptor sensep true, plusp true\n"); */
+ if (hide_soft_clips_p == true) {
+ print_cigar(fp,/*type*/'M',Substring_querystart(acceptor) + Substring_match_length(acceptor),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ print_cigar(fp,/*type*/'E',querylength - Substring_queryend(acceptor),
+ /*querypos*/Substring_queryend(acceptor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,
+ /*trimlength*/Substring_trim_right(acceptor));
+ } else {
+ print_cigar(fp,/*type*/'S',Substring_querystart(acceptor),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ print_cigar(fp,/*type*/'M',Substring_match_length(acceptor),
+ /*querypos*/Substring_querystart(acceptor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ print_cigar(fp,/*type*/'E',querylength - Substring_queryend(acceptor),
+ /*querypos*/Substring_queryend(acceptor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,
+ /*trimlength*/Substring_trim_right(acceptor));
+ }
- } else if (plusp == true) {
- if (hardclip_low < Substring_queryend(substringM) &&
- querylength - hardclip_high > Substring_querystart(substring2)) {
- debug1(printf("\ncase 5: hardclip_low %d < queryend(substringM) %d && querylength %d - hardclip_high %d > querystart(substring2) %d\n",
- hardclip_low,Substring_queryend(substringM),querylength,hardclip_high,Substring_querystart(substring2)));
- fprintf(fp,"%uN",distance2);
- }
- if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/'M',
- Substring_match_length(substring2) +
- (querylength - Substring_queryend(substring2)),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
} else {
- print_cigar(fp,/*type*/'M',Substring_match_length(substring2),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring2),
- /*querypos*/Substring_queryend(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ /* sensep true, plusp false */
+ /* FPRINTF(fp,"acceptor sensep true, plusp false\n"); */
+ if (hide_soft_clips_p == true) {
+ print_cigar(fp,/*type*/'E',querylength - Substring_queryend(acceptor),
+ /*querypos*/querylength,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
+ /*trimlength*/Substring_trim_right(acceptor));
+ print_cigar(fp,/*type*/'M',Substring_match_length(acceptor) + Substring_querystart(acceptor),
+ /*querypos*/Substring_queryend(acceptor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,
+ /*trimlength*/0);
+ } else {
+ print_cigar(fp,/*type*/'E',querylength - Substring_queryend(acceptor),
+ /*querypos*/querylength,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
+ /*trimlength*/Substring_trim_right(acceptor));
+ print_cigar(fp,/*type*/'M',Substring_match_length(acceptor),
+ /*querypos*/Substring_queryend(acceptor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
+ /*trimlength*/0);
+ print_cigar(fp,/*type*/'S',Substring_querystart(acceptor),
+ /*querypos*/Substring_querystart(acceptor),querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/true,/*trimlength*/0);
+ }
}
} else {
- if (querylength - hardclip_low > Substring_queryend(substring2) &&
- hardclip_high < Substring_querystart(substringM)) {
- debug1(printf("\ncase 6: querylength %d - hardclip_low %d > queryend(substring2) %d && hardclip_high %d < querystart(substringM) %d\n",
- querylength,hardclip_low,Substring_queryend(substring2),querylength,Substring_querystart(substringM)));
- fprintf(fp,"%uN",distance2);
- }
- if (hide_soft_clips_p == true) {
- print_cigar(fp,/*type*/'M',
- Substring_match_length(substring2) +
- Substring_querystart(substring2),
- /*querypos*/Substring_queryend(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ /* sensep false, plusp true */
+ assert(Substring_chimera_pos(acceptor) == Substring_queryend(acceptor));
+ if (plusp == true) {
+ /* FPRINTF(fp,"acceptor sensep false, plusp true\n"); */
+ if (hide_soft_clips_p == true) {
+ print_cigar(fp,/*type*/'E',Substring_querystart(acceptor),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false,/*trimlength*/Substring_trim_left(acceptor));
+ print_cigar(fp,/*type*/'M',Substring_match_length(acceptor) + (querylength - Substring_queryend(acceptor)),
+ /*querypos*/Substring_querystart(acceptor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,
+ /*trimlength*/0);
+ } else {
+ print_cigar(fp,/*type*/'E',Substring_querystart(acceptor),
+ /*querypos*/0,querylength,hardclip_low,hardclip_high,
+ /*plusp*/true,/*lastp*/false,/*trimlength*/Substring_trim_left(acceptor));
+ print_cigar(fp,/*type*/'M',Substring_match_length(acceptor),
+ /*querypos*/Substring_querystart(acceptor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+ print_cigar(fp,/*type*/'S',querylength - Substring_queryend(acceptor),
+ /*querypos*/Substring_queryend(acceptor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,
+ /*trimlength*/Substring_trim_right(acceptor));
+ }
+
} else {
- print_cigar(fp,/*type*/'M',Substring_match_length(substring2),
- /*querypos*/Substring_queryend(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- print_cigar(fp,/*type*/'S',Substring_querystart(substring2),
- /*querypos*/Substring_querystart(substring2),querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/true);
+ /* sensep false, plusp false */
+ /* FPRINTF(fp,"acceptor sensep false, plusp false\n"); */
+ if (hide_soft_clips_p == true) {
+ print_cigar(fp,/*type*/'M',(querylength - Substring_queryend(acceptor)) + Substring_match_length(acceptor),
+ /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/false,/*trimlength*/0);
+ print_cigar(fp,/*type*/'E',Substring_querystart(acceptor),
+ /*querypos*/Substring_querystart(acceptor),querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/true,/*trimlength*/Substring_trim_left(acceptor));
+ } else {
+ print_cigar(fp,/*type*/'S',querylength - Substring_queryend(acceptor),
+ /*querypos*/querylength,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
+ /*trimlength*/0);
+ print_cigar(fp,/*type*/'M',Substring_match_length(acceptor),
+ /*querypos*/Substring_queryend(acceptor),querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
+ /*trimlength*/0);
+ print_cigar(fp,/*type*/'E',Substring_querystart(acceptor),
+ /*querypos*/Substring_querystart(acceptor),querylength,hardclip_low,hardclip_high,
+ /*plusp*/false,/*lastp*/true,/*trimlength*/Substring_trim_left(acceptor));
+ }
}
}
/* 7. MRNM: Mate chr */
/* 8. MPOS: Mate chrpos */
+ /* For anchor_chrnum, previously used Stage3end_chrnum(this), but this is 0 */
print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
mate_chrpos,Stage3end_chrlength(mate),
- Stage3end_chrnum(shortexon),chrpos,chromosome_iit);
+ /*anchor_chrnum*/Substring_chrnum(acceptor),acceptor_chrpos,chromosome_iit);
/* 9. ISIZE: Insert size */
if (resulttype == CONCORDANT_UNIQ || resulttype == CONCORDANT_TRANSLOC || resulttype == CONCORDANT_MULT) {
if (plusp == invertp) {
- fprintf(fp,"\t%d",-pairedlength);
+ FPRINTF(fp,"\t%d",-pairedlength);
} else {
- fprintf(fp,"\t%d",pairedlength);
+ FPRINTF(fp,"\t%d",pairedlength);
}
} else if (mate_chrpos == 0) {
- fprintf(fp,"\t%d",pairedlength);
- } else if (chrpos < mate_chrpos) {
- fprintf(fp,"\t%d",pairedlength);
- } else if (chrpos > mate_chrpos) {
- fprintf(fp,"\t%d",-pairedlength);
+ FPRINTF(fp,"\t%d",pairedlength);
+#if 0
+ } else if (concordant_chrpos < mate_chrpos) {
+ FPRINTF(fp,"\t%d",pairedlength);
+ } else if (concordant_chrpos > mate_chrpos) {
+ FPRINTF(fp,"\t%d",-pairedlength);
+#endif
} else if (first_read_p == true) {
- fprintf(fp,"\t%d",pairedlength);
+ FPRINTF(fp,"\t%d",pairedlength);
} else {
- fprintf(fp,"\t%d",-pairedlength);
+ FPRINTF(fp,"\t%d",-pairedlength);
}
/* 10. SEQ: queryseq and 11. QUAL: quality scores */
/* Queryseq has already been inverted, so just measure plusp relative to its current state */
- fprintf(fp,"\t");
if (plusp == true) {
- Shortread_print_chopped(fp,queryseq,hardclip_low,hardclip_high);
- fprintf(fp,"\t");
+ Shortread_print_chopped_sam(fp,queryseq,hardclip_low,hardclip_high);
+ FPRINTF(fp,"\t");
Shortread_print_quality(fp,queryseq,hardclip_low,hardclip_high,
- quality_shift,/*show_chopped_p*/false);
+ quality_shift,/*show_chopped_p*/false);
} else {
- Shortread_print_chopped_revcomp(fp,queryseq,hardclip_low,hardclip_high);
- fprintf(fp,"\t");
+ Shortread_print_chopped_revcomp_sam(fp,queryseq,hardclip_low,hardclip_high);
+ FPRINTF(fp,"\t");
Shortread_print_quality_revcomp(fp,queryseq,hardclip_low,hardclip_high,
- quality_shift,/*show_chopped_p*/false);
- }
+ quality_shift,/*show_chopped_p*/false);
+ }
+
/* 12. TAGS: RG */
if (sam_read_group_id != NULL) {
- fprintf(fp,"\tRG:Z:%s",sam_read_group_id);
+ FPRINTF(fp,"\tRG:Z:%s",sam_read_group_id);
}
- /* 12. TAGS: XH */
+ /* 12. TAGS: XH and XI */
if (hardclip_low > 0 || hardclip_high > 0) {
- fprintf(fp,"\tXH:Z:");
+ FPRINTF(fp,"\tXH:Z:");
if (plusp == true) {
Shortread_print_chopped_end(fp,queryseq,hardclip_low,hardclip_high);
} else {
Shortread_print_chopped_end_revcomp(fp,queryseq,hardclip_low,hardclip_high);
}
+
+ if (Shortread_quality_string(queryseq) != NULL) {
+ FPRINTF(fp,"\tXI:Z:");
+ if (plusp == true) {
+ Shortread_print_chopped_end_quality(fp,queryseq,hardclip_low,hardclip_high);
+ } else {
+ Shortread_print_chopped_end_quality_reverse(fp,queryseq,hardclip_low,hardclip_high);
+ }
+ }
}
/* 12. TAGS: XB */
@@ -5025,170 +2936,107 @@ print_shortexon (FILE *fp, char *abbrev, Stage3end_T shortexon, Stage3end_T mate
Shortread_print_chop(fp,queryseq,invertp);
/* 12. TAGS: MD */
- fprintf(fp,"\tMD:Z:");
+ FPRINTF(fp,"\tMD:Z:");
printp = false;
if (hide_soft_clips_p == true) {
- substringM_start = Substring_querystart_orig(substringM);
- substringM_length = Substring_match_length_orig(substringM);
- } else {
- substringM_start = Substring_querystart(substringM);
- substringM_length = Substring_match_length(substringM);
- }
-
- if (substring1 == NULL) {
- substring1_start = 0;
- substring1_length = 0;
- } else if (hide_soft_clips_p == true) {
- substring1_start = Substring_querystart_orig(substring1);
- substring1_length = Substring_match_length_orig(substring1);
- } else {
- substring1_start = Substring_querystart(substring1);
- substring1_length = Substring_match_length(substring1);
- }
- if (substring2 == NULL) {
- substring2_start = 0;
- substring2_length = 0;
- } else if (hide_soft_clips_p == true) {
- substring2_start = Substring_querystart_orig(substring2);
- substring2_length = Substring_match_length_orig(substring2);
+ substring_start = Substring_querystart_orig(acceptor);
+ substring_length = Substring_match_length_orig(acceptor);
} else {
- substring2_start = Substring_querystart(substring2);
- substring2_length = Substring_match_length(substring2);
+ substring_start = Substring_querystart(acceptor);
+ substring_length = Substring_match_length(acceptor);
}
- if (plusp == true) {
-
- if (substring1 == NULL) {
- matchlength = 0;
- } else {
- genomicfwd_refdiff = Substring_genomic_refdiff(substring1);
- genomicfwd_bothdiff = Substring_genomic_bothdiff(substring1);
- matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
- &(genomicfwd_refdiff[substring1_start]),&(genomicfwd_bothdiff[substring1_start]),
- substring1_length,/*querypos*/substring1_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- }
-
-#if 0
- /* Intron 1: Gets skipped in MD string */
- fprintf(fp,"^");
-#endif
-
- genomicfwd_refdiff = Substring_genomic_refdiff(substringM);
- genomicfwd_bothdiff = Substring_genomic_bothdiff(substringM);
- matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,matchlength,
- &(genomicfwd_refdiff[substringM_start]),&(genomicfwd_bothdiff[substringM_start]),
- substringM_length,/*querypos*/substringM_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
-
-#if 0
- /* Intron 2: Gets skipped in MD string */
- fprintf(fp,"^");
-#endif
-
- if (substring2 == NULL) {
- /* Equivalent: if (matchlength > 0) fprintf(fp,"%d",matchlength); */
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,matchlength,/*genomicfwd_refdiff*/NULL,/*genomicfwd_bothdiff*/NULL,
- /*substring2_length*/0,/*querypos*/0,querylength,
+ if (use_hardclip_p == false) {
+ genomicdir_refdiff = Substring_genomic_refdiff(acceptor);
+ genomicdir_bothdiff = Substring_genomic_bothdiff(acceptor);
+ if (plusp == true) {
+ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
+ &(genomicdir_refdiff[substring_start]),&(genomicdir_bothdiff[substring_start]),
+ substring_length,/*querypos*/substring_start,querylength,
hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ } else if (genomicdir_bothdiff == genomicdir_refdiff) {
+ genomicfwd_refdiff = (char *) MALLOCA((querylength+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ FREEA(genomicfwd_refdiff);
} else {
- genomicfwd_refdiff = Substring_genomic_refdiff(substring2);
- genomicfwd_bothdiff = Substring_genomic_bothdiff(substring2);
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,matchlength,
- &(genomicfwd_refdiff[substring2_start]),&(genomicfwd_bothdiff[substring2_start]),
- substring2_length,/*querypos*/substring2_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
+ genomicfwd_refdiff = (char *) MALLOCA((querylength+1) * sizeof(char));
+ genomicfwd_bothdiff = (char *) MALLOCA((querylength+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+ make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring_start]),substring_length);
+ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ FREEA(genomicfwd_bothdiff);
+ FREEA(genomicfwd_refdiff);
}
- } else {
-
- if (substring1 == NULL) {
- matchlength = 0;
+ } else if (sensep == false) {
+ if (plusp == true) {
+ genomicfwd_refdiff = Substring_genomic_refdiff(acceptor);
+ genomicfwd_bothdiff = Substring_genomic_bothdiff(acceptor);
+ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
+ &(genomicfwd_refdiff[substring_start]),&(genomicfwd_bothdiff[substring_start]),
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
} else {
- genomicdir_refdiff = Substring_genomic_refdiff(substring1);
- genomicdir_bothdiff = Substring_genomic_bothdiff(substring1);
+ genomicdir_refdiff = Substring_genomic_refdiff(acceptor);
+ genomicdir_bothdiff = Substring_genomic_bothdiff(acceptor);
if (genomicdir_bothdiff == genomicdir_refdiff) {
- genomicfwd_refdiff = (char *) MALLOCA((substring1_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring1_start]),substring1_length);
- matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
- substring1_length,/*querypos*/substring1_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
FREEA(genomicfwd_refdiff);
} else {
- genomicfwd_refdiff = (char *) MALLOCA((substring1_length+1) * sizeof(char));
- genomicfwd_bothdiff = (char *) MALLOCA((substring1_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring1_start]),substring1_length);
- make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring1_start]),substring1_length);
- matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
- substring1_length,/*querypos*/substring1_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
+ genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ genomicfwd_bothdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+ make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring_start]),substring_length);
+ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
+ fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
FREEA(genomicfwd_bothdiff);
FREEA(genomicfwd_refdiff);
}
- }
-#if 0
- /* Intron 1: Gets skipped in MD string */
- fprintf(fp,"^");
-#endif
-
- genomicdir_refdiff = Substring_genomic_refdiff(substringM);
- genomicdir_bothdiff = Substring_genomic_bothdiff(substringM);
- if (genomicdir_bothdiff == genomicdir_refdiff) {
- genomicfwd_refdiff = (char *) MALLOCA((substringM_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substringM_start]),substringM_length);
- matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,matchlength,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
- substringM_length,/*querypos*/substringM_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- FREEA(genomicfwd_refdiff);
- } else {
- genomicfwd_refdiff = (char *) MALLOCA((substringM_length+1) * sizeof(char));
- genomicfwd_bothdiff = (char *) MALLOCA((substringM_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substringM_start]),substringM_length);
- make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substringM_start]),substringM_length);
- matchlength = print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,matchlength,genomicfwd_refdiff,genomicfwd_bothdiff,
- substringM_length,/*querypos*/substringM_start,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- FREEA(genomicfwd_bothdiff);
- FREEA(genomicfwd_refdiff);
}
-#if 0
- /* Intron 2: Not sure how to handle in MD string */
- fprintf(fp,"^");
-#endif
-
- if (substring2 == NULL) {
- /* Equivalent: if (matchlength > 0) fprintf(fp,"%d",matchlength); */
- print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,matchlength,/*genomicfwd_refdiff*/NULL,/*genomicfwd_bothdiff*/NULL,
- /*substring2_length*/0,/*querypos*/0,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
+ } else { /* sensep true */
+ if (plusp == true) {
+ genomicfwd_refdiff = Substring_genomic_refdiff(acceptor);
+ genomicfwd_bothdiff = Substring_genomic_bothdiff(acceptor);
+ print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,fp,/*matchlength*/0,
+ &(genomicfwd_refdiff[substring_start]),&(genomicfwd_bothdiff[substring_start]),
+ substring_length,/*querypos*/substring_start,querylength,
+ hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
} else {
- genomicdir_refdiff = Substring_genomic_refdiff(substring2);
- genomicdir_bothdiff = Substring_genomic_bothdiff(substring2);
+ genomicdir_refdiff = Substring_genomic_refdiff(acceptor);
+ genomicdir_bothdiff = Substring_genomic_bothdiff(acceptor);
if (genomicdir_bothdiff == genomicdir_refdiff) {
- genomicfwd_refdiff = (char *) MALLOCA((substring2_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring2_start]),substring2_length);
+ genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,matchlength,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
- substring2_length,/*querypos*/substring2_start,querylength,
+ fp,/*matchlength*/0,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
+ substring_length,/*querypos*/substring_start,querylength,
hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
FREEA(genomicfwd_refdiff);
} else {
- genomicfwd_refdiff = (char *) MALLOCA((substring2_length+1) * sizeof(char));
- genomicfwd_bothdiff = (char *) MALLOCA((substring2_length+1) * sizeof(char));
- make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring2_start]),substring2_length);
- make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring2_start]),substring2_length);
+ genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ genomicfwd_bothdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+ make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+ make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring_start]),substring_length);
print_md_string(&printp,&nmismatches_refdiff,&nmismatches_bothdiff,
- fp,matchlength,genomicfwd_refdiff,genomicfwd_bothdiff,
- substring2_length,/*querypos*/substring2_start,querylength,
+ fp,/*matchlength*/0,genomicfwd_refdiff,genomicfwd_bothdiff,
+ substring_length,/*querypos*/substring_start,querylength,
hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
FREEA(genomicfwd_bothdiff);
FREEA(genomicfwd_refdiff);
@@ -5196,312 +3044,143 @@ print_shortexon (FILE *fp, char *abbrev, Stage3end_T shortexon, Stage3end_T mate
}
}
if (printp == false) {
- fprintf(fp,"0");
+ FPRINTF(fp,"0");
}
/* 12. TAGS: NH */
- fprintf(fp,"\tNH:i:%d",npaths);
-
/* 12. TAGS: HI */
- fprintf(fp,"\tHI:i:%d",pathnum);
-
/* 12. TAGS: NM */
- /* fprintf(fp,"\tNM:i:%d",Stage3end_nmismatches_refdiff(shortexon)); */
- fprintf(fp,"\tNM:i:%d",nmismatches_refdiff);
+ FPRINTF(fp,"\tNH:i:%d\tHI:i:%d\tNM:i:%d",npaths,pathnum,nmismatches_refdiff);
if (snps_iit) {
/* 12. TAGS: XW and XV */
- fprintf(fp,"\tXW:i:%d",nmismatches_bothdiff);
- fprintf(fp,"\tXV:i:%d",nmismatches_refdiff - nmismatches_bothdiff);
+ FPRINTF(fp,"\tXW:i:%d",nmismatches_bothdiff);
+ FPRINTF(fp,"\tXV:i:%d",nmismatches_refdiff - nmismatches_bothdiff);
}
/* 12. TAGS: SM */
- fprintf(fp,"\tSM:i:%d",mapq_score);
-
/* 12. TAGS: XQ */
- fprintf(fp,"\tXQ:i:%d",absmq_score);
-
/* 12. TAGS: X2 */
- fprintf(fp,"\tX2:i:%d",second_absmq);
+ FPRINTF(fp,"\tSM:i:%d\tXQ:i:%d\tX2:i:%d",mapq_score,absmq_score,second_absmq);
/* 12. TAGS: XO */
- fprintf(fp,"\tXO:Z:%s",abbrev);
+ FPRINTF(fp,"\tXO:Z:%s",abbrev);
/* 12. TAGS: XS */
- if (sensedir == SENSE_FORWARD) {
- if (plusp == true) {
- fprintf(fp,"\tXS:A:+");
- } else {
- fprintf(fp,"\tXS:A:-");
- }
- } else if (sensedir == SENSE_ANTI) {
- if (plusp == true) {
- fprintf(fp,"\tXS:A:-");
- } else {
- fprintf(fp,"\tXS:A:+");
- }
- } else if (force_xs_direction_p == true) {
- fprintf(fp,"\tXS:A:+");
- } else {
- fprintf(fp,"\tXS:A:?");
- }
+ assert(acceptor_sensedir != SENSE_NULL);
+ FPRINTF(fp,"\tXS:A:%c",acceptor_strand);
/* 12. TAGS: XA */
- if ((start_ambig = Stage3end_start_ambiguous_p(shortexon)) == true ||
- (end_ambig = Stage3end_end_ambiguous_p(shortexon)) == true) {
- fprintf(fp,"\tXA:Z:");
+ if ((start_ambig = Stage3end_start_ambiguous_p(this)) == true ||
+ (end_ambig = Stage3end_end_ambiguous_p(this)) == true) {
+ FPRINTF(fp,"\tXA:Z:");
if (plusp == true) {
- if ((n = Stage3end_start_nambcoords(shortexon)) > 0) {
- start_ambcoords = Stage3end_start_ambcoords(shortexon);
+ if ((n = Stage3end_start_nambcoords(this)) > 0) {
+ assert(sensep == true);
+ start_ambcoords = Stage3end_start_ambcoords(this);
#ifdef PRINT_AMBIG_COORDS
- chroffset = Substring_chroffset(substringM);
- fprintf(fp,"%u",start_ambcoords[0] - chroffset + 1U);
+ chroffset = Substring_chroffset(acceptor);
+ FPRINTF(fp,"%u",start_ambcoords[0] - chroffset + 1U);
for (i = 1; i < n; i++) {
- fprintf(fp,",%u",start_ambcoords[i] - chroffset + 1U);
+ FPRINTF(fp,",%u",start_ambcoords[i] - chroffset + 1U);
}
#else
- splicecoord = Substring_alignstart(substringM);
- fprintf(fp,"%u",splicecoord - start_ambcoords[0]);
+ splicecoord = Substring_alignstart(acceptor);
+ FPRINTF(fp,"%u",splicecoord - start_ambcoords[0]);
for (i = 1; i < n; i++) {
- fprintf(fp,",%u",splicecoord - start_ambcoords[i]);
+ FPRINTF(fp,",%u",splicecoord - start_ambcoords[i]);
}
#endif
}
- fprintf(fp,"|");
- if ((n = Stage3end_end_nambcoords(shortexon)) > 0) {
- end_ambcoords = Stage3end_end_ambcoords(shortexon);
+ FPRINTF(fp,"|");
+ if ((n = Stage3end_end_nambcoords(this)) > 0) {
+ assert(sensep == false);
+ end_ambcoords = Stage3end_end_ambcoords(this);
#ifdef PRINT_AMBIG_COORDS
- chroffset = Substring_chroffset(substringM);
- fprintf(fp,"%u",end_ambcoords[0] - chroffset + 1U);
+ chroffset = Substring_chroffset(acceptor);
+ FPRINTF(fp,"%u",end_ambcoords[0] - chroffset + 1U);
for (i = 1; i < n; i++) {
- fprintf(fp,",%u",end_ambcoords[i] - chroffset + 1U);
+ FPRINTF(fp,",%u",end_ambcoords[i] - chroffset + 1U);
}
#else
- splicecoord = Substring_alignend(substringM);
- fprintf(fp,"%u",end_ambcoords[0] - splicecoord);
+ splicecoord = Substring_alignend(acceptor);
+ FPRINTF(fp,"%u",end_ambcoords[0] - splicecoord);
for (i = 1; i < n; i++) {
- fprintf(fp,",%u",end_ambcoords[i] - splicecoord);
+ FPRINTF(fp,",%u",end_ambcoords[i] - splicecoord);
}
#endif
}
} else {
- if ((n = Stage3end_end_nambcoords(shortexon)) > 0) {
- end_ambcoords = Stage3end_end_ambcoords(shortexon);
+ if ((n = Stage3end_end_nambcoords(this)) > 0) {
+ assert(sensep == false);
+ end_ambcoords = Stage3end_end_ambcoords(this);
#ifdef PRINT_AMBIG_COORDS
- chroffset = Substring_chroffset(substringM);
- fprintf(fp,"%u",end_ambcoords[0] - chroffset + 1U);
+ chroffset = Substring_chroffset(acceptor);
+ FPRINTF(fp,"%u",end_ambcoords[0] - chroffset + 1U);
for (i = 1; i < n; i++) {
- fprintf(fp,",%u",end_ambcoords[i] - chroffset + 1U);
+ FPRINTF(fp,",%u",end_ambcoords[i] - chroffset + 1U);
}
#else
- splicecoord = Substring_alignend(substringM);
- fprintf(fp,"%u",splicecoord - end_ambcoords[0]);
+ splicecoord = Substring_alignend(acceptor);
+ FPRINTF(fp,"%u",splicecoord - end_ambcoords[0]);
for (i = 1; i < n; i++) {
- fprintf(fp,",%u",splicecoord - end_ambcoords[i]);
+ FPRINTF(fp,",%u",splicecoord - end_ambcoords[i]);
}
#endif
}
- fprintf(fp,"|");
- if ((n = Stage3end_start_nambcoords(shortexon)) > 0) {
- start_ambcoords = Stage3end_start_ambcoords(shortexon);
+ FPRINTF(fp,"|");
+ if ((n = Stage3end_start_nambcoords(this)) > 0) {
+ assert(sensep == true);
+ start_ambcoords = Stage3end_start_ambcoords(this);
#ifdef PRINT_AMBIG_COORDS
- chroffset = Substring_chroffset(substringM);
- fprintf(fp,"%u",start_ambcoords[0] - chroffset + 1U);
+ chroffset = Substring_chroffset(acceptor);
+ FPRINTF(fp,"%u",start_ambcoords[0] - chroffset + 1U);
for (i = 1; i < n; i++) {
- fprintf(fp,",%u",start_ambcoords[i] - chroffset + 1U);
+ FPRINTF(fp,",%u",start_ambcoords[i] - chroffset + 1U);
}
#else
- splicecoord = Substring_alignstart(substringM);
- fprintf(fp,"%u",start_ambcoords[0] - splicecoord);
+ splicecoord = Substring_alignstart(acceptor);
+ FPRINTF(fp,"%u",start_ambcoords[0] - splicecoord);
for (i = 1; i < n; i++) {
- fprintf(fp,",%u",start_ambcoords[i] - splicecoord);
+ FPRINTF(fp,",%u",start_ambcoords[i] - splicecoord);
}
#endif
}
}
}
- /* 12. TAGS: XC */
- if (circularp == true) {
- fprintf(fp,"\tXC:A:+");
- }
-
- /* 12. TAGS: XG */
- if (Stage3end_sarrayp(shortexon) == true) {
- fprintf(fp,"\tXG:Z:A");
- }
-
- fprintf(fp,"\n");
- return;
-}
-
-
-static bool
-check_cigar_shortexon (Stage3end_T shortexon, Stage3end_T mate, int querylength, int clipdir, int hardclip_low, int hardclip_high,
- bool first_read_p, bool circularp) {
- bool result;
- Intlist_T cigar_types = NULL;
- Substring_T substring1, substring2, substringM;
- bool plusp, sensep;
- int sensedir;
-
- plusp = Stage3end_plusp(shortexon);
-
- if ((sensedir = Stage3end_sensedir(shortexon)) == SENSE_NULL) {
- sensedir = Stage3end_sensedir(mate);
- }
- sensep = (sensedir == SENSE_FORWARD);
-
- substringM = Stage3end_substring1(shortexon);
-
- if (sensep == plusp) {
- substring1 = /* donor */ Stage3end_substringD(shortexon);
- substring2 = /* acceptor */ Stage3end_substringA(shortexon);
- } else {
- substring1 = /* acceptor */ Stage3end_substringA(shortexon);
- substring2 = /* donor */ Stage3end_substringD(shortexon);
- }
-
- if (substring1 == NULL) {
- if (plusp == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substringM),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substringM),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- }
-
- } else if (plusp == true) {
- if (hide_soft_clips_p == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_querystart(substring1) +
- Substring_match_length(substring1),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring1),
- /*querypos*/0,querylength,hardclip_low,hardclip_high,
- /*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring1),
- /*querypos*/Substring_querystart(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- }
- if (hardclip_low < Substring_queryend(substring1) &&
- querylength - hardclip_high > Substring_querystart(substringM)) {
- debug1(printf("\ncase 3: hardclip_low %d < queryend(substring1) %d && querylength %d - hardclip_high %d > querystart(substringM) %d\n",
- hardclip_low,Substring_queryend(substring1),querylength,hardclip_high,Substring_querystart(substringM)));
- cigar_types = Intlist_push(cigar_types,'N');
- }
-
- } else {
- if (hide_soft_clips_p == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- (querylength - Substring_queryend(substring1)) +
- Substring_match_length(substring1),
- /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/false);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring1),
- /*querypos*/querylength,querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring1),
- /*querypos*/Substring_queryend(substring1),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- }
- if (querylength - hardclip_low > Substring_queryend(substringM) &&
- hardclip_high < Substring_querystart(substring1)) {
- debug1(printf("\ncase 4: querylength %d - hardclip_low %d > queryend(substringM) %d && hardclip_high %d < querystart(substring1) %d\n",
- querylength,hardclip_low,Substring_queryend(substringM),hardclip_high,Substring_querystart(substring1)));
- cigar_types = Intlist_push(cigar_types,'N');
- }
- }
-
- if (plusp == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substringM),
- /*querypos*/Substring_querystart(substringM),querylength,
- hardclip_low,hardclip_high,plusp,/*lastp*/false);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substringM),
- /*querypos*/Substring_queryend(substringM),querylength,
- hardclip_low,hardclip_high,plusp,/*lastp*/false);
+ /* 12. TAGS: XT */
+ if (print_xt_p == true) {
+ FPRINTF(fp,"\tXT:Z:%c%c-%c%c,%.2f,%.2f",donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob);
+ FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,donor_chrpos,acceptor_strand,acceptor_chr,acceptor_chrpos);
}
- if (substring2 == NULL) {
- if (plusp == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substringM),
- /*querypos*/Substring_queryend(substringM),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substringM),
- /*querypos*/Substring_querystart(substringM),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- }
-
- } else if (plusp == true) {
- if (hardclip_low < Substring_queryend(substringM) &&
- querylength - hardclip_high > Substring_querystart(substring2)) {
- debug1(printf("\ncase 5: hardclip_low %d < queryend(substringM) %d && querylength %d - hardclip_high %d > querystart(substring2) %d\n",
- hardclip_low,Substring_queryend(substringM),querylength,hardclip_high,Substring_querystart(substring2)));
- cigar_types = Intlist_push(cigar_types,'N');
- }
- if (hide_soft_clips_p == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_match_length(substring2) +
- (querylength - Substring_queryend(substring2)),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring2),
- /*querypos*/Substring_querystart(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',querylength - Substring_queryend(substring2),
- /*querypos*/Substring_queryend(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true);
- }
- } else {
- if (querylength - hardclip_low > Substring_queryend(substring2) &&
- hardclip_high < Substring_querystart(substringM)) {
- debug1(printf("\ncase 6: querylength %d - hardclip_low %d > queryend(substring2) %d && hardclip_high %d < querystart(substringM) %d\n",
- querylength,hardclip_low,Substring_queryend(substring2),querylength,Substring_querystart(substringM)));
- cigar_types = Intlist_push(cigar_types,'N');
- }
- if (hide_soft_clips_p == true) {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',
- Substring_match_length(substring2) +
- Substring_querystart(substring2),
- /*querypos*/Substring_queryend(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true);
- } else {
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'M',Substring_match_length(substring2),
- /*querypos*/Substring_queryend(substring2),querylength,
- hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false);
- cigar_types = compute_cigar_types_only(cigar_types,/*type*/'S',Substring_querystart(substring2),
- /*querypos*/Substring_querystart(substring2),querylength,hardclip_low,hardclip_high,
- /*plusp*/false,/*lastp*/true);
- }
+ /* 12. TAGS: XC */
+ if (circularp == true) {
+ FPRINTF(fp,"\tXC:A:+");
}
- result = check_cigar_types(cigar_types);
+ /* 12. TAGS: XG */
+ if (Stage3end_sarrayp(this) == true) {
+ FPRINTF(fp,"\tXG:Z:A");
+ }
- Intlist_free(&cigar_types);
- return result;
+ FPRINTF(fp,"\n");
+ return;
}
-
/* Distant splicing, including scramble, inversion, translocation */
static void
-print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
+print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
char *acc1, char *acc2, int pathnum, int npaths,
int absmq_score, int first_absmq, int second_absmq, int mapq_score,
Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
- Chrpos_T mate_chrpos, int clipdir, int hardclip_low, int hardclip_high,
+ Chrpos_T mate_chrpos, int hardclip_low, int hardclip_high,
Resulttype_T resulttype, bool first_read_p, int npaths_mate,
int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p) {
Chrpos_T donor_chrpos, acceptor_chrpos, concordant_chrpos;
@@ -5511,87 +3190,66 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
double donor_prob, acceptor_prob;
int circularpos, querylength;
char donor_strand, acceptor_strand;
- int sensedir;
+ int sensedir, donor_sensedir, acceptor_sensedir;
bool allocp;
+ debug(printf("Entered print_exon_exon with hardclip_low %d, and hardclip_high %d\n",
+ hardclip_low,hardclip_high));
+
+ sensedir = Stage3end_sensedir(this);
donor = Stage3end_substring_donor(this);
acceptor = Stage3end_substring_acceptor(this);
-#if 0
- if (first_read_p == true) {
- hardclip_low = 0;
- hardclip_high = hardclip5;
- } else {
- hardclip_low = hardclip3;
- hardclip_high = 0;
- }
-#else
+ querylength = Shortread_fulllength(queryseq);
+
/* Shouldn't have any overlap on a distant splice */
hardclip_low = hardclip_high = 0;
-#endif
- querylength = Shortread_fulllength(queryseq);
- donor_chrpos = SAM_compute_chrpos(hardclip_low,hardclip_high,this,querylength);
- acceptor_chrpos = SAM_compute_chrpos(hardclip_low,hardclip_high,this,querylength);
- if (Stage3end_substring_low(this,hardclip_low) == donor) {
- concordant_chrpos = donor_chrpos;
- } else if (Stage3end_substring_low(this,hardclip_low) == acceptor) {
- concordant_chrpos = acceptor_chrpos;
- } else {
-#if 0
- fprintf(stderr,"Stage3end_substring_low %p is neither donor %p or acceptor %p\n",
- Stage3end_substring_low(this),donor,acceptor);
-#endif
- concordant_chrpos = 0U;
- }
+ donor_chrpos = Substring_compute_chrpos(donor,hardclip_low,hide_soft_clips_p);
+ acceptor_chrpos = Substring_compute_chrpos(acceptor,hardclip_low,hide_soft_clips_p);
- halfdonor_dinucleotide(&donor1,&donor2,donor);
- halfacceptor_dinucleotide(&acceptor2,&acceptor1,acceptor);
+ halfdonor_dinucleotide(&donor1,&donor2,donor,sensedir);
+ halfacceptor_dinucleotide(&acceptor2,&acceptor1,acceptor,sensedir);
donor_chr = Univ_IIT_label(chromosome_iit,Substring_chrnum(donor),&allocp);
acceptor_chr = Univ_IIT_label(chromosome_iit,Substring_chrnum(acceptor),&allocp);
donor_prob = Substring_chimera_prob(donor);
acceptor_prob = Substring_chimera_prob(acceptor);
/* Code taken from that for XS tag for print_halfdonor and print_halfacceptor */
- if ((sensedir = Substring_chimera_sensedir(donor)) == SENSE_FORWARD) {
+ /* For the donor and acceptor strands, use the substring sensedir and not the Stage3end_T sensedir */
+ if ((donor_sensedir = Substring_chimera_sensedir(donor)) == SENSE_FORWARD) {
if (Substring_plusp(donor) == true) {
donor_strand = '+';
} else {
donor_strand = '-';
}
- } else if (sensedir == SENSE_ANTI) {
+ } else if (donor_sensedir == SENSE_ANTI) {
if (Substring_plusp(donor) == true) {
donor_strand = '-';
} else {
donor_strand = '+';
}
- } else if (force_xs_direction_p == true) {
- donor_strand = '+';
} else {
- donor_strand = '?';
+ abort();
}
- /* Code taken from that for XS tag for print_halfdonor and print_halfacceptor */
- if ((sensedir = Substring_chimera_sensedir(acceptor)) == SENSE_FORWARD) {
+ if ((acceptor_sensedir = Substring_chimera_sensedir(acceptor)) == SENSE_FORWARD) {
if (Substring_plusp(acceptor) == true) {
acceptor_strand = '+';
} else {
acceptor_strand = '-';
}
- } else if (sensedir == SENSE_ANTI) {
+ } else if (acceptor_sensedir == SENSE_ANTI) {
if (Substring_plusp(acceptor) == true) {
acceptor_strand = '-';
} else {
acceptor_strand = '+';
}
- } else if (force_xs_direction_p == true) {
- acceptor_strand = '+';
} else {
- acceptor_strand = '?';
+ abort();
}
-
- if (Stage3end_sensedir(this) == SENSE_FORWARD) {
+ if (sensedir == SENSE_FORWARD) {
/* NEEDS WORK: Need to decide whether to split halfdonor or halfacceptor */
/* Not sure if circular chromosomes should participate in distant splicing anyway */
@@ -5600,20 +3258,20 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
- /*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+ /*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
- donor_strand,acceptor_strand,donor_chr,acceptor_chr,
+ donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
/*concordant_chrpos*/1,/*donor_chrpos*/1,acceptor_chrpos,mate_chrpos,
- /*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
+ /*hardclip_low*/circularpos,/*hardclip_high*/0,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
- donor_strand,acceptor_strand,donor_chr,acceptor_chr,
+ donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
} else {
@@ -5621,10 +3279,10 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
- clipdir,hardclip_low,hardclip_high,resulttype,first_read_p,
+ hardclip_low,hardclip_high,resulttype,first_read_p,
npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
- donor_strand,acceptor_strand,donor_chr,acceptor_chr,
+ donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/false);
}
@@ -5634,20 +3292,20 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
- /*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+ /*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
- donor_strand,acceptor_strand,donor_chr,acceptor_chr,
+ acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
/*concordant_chrpos*/1,donor_chrpos,/*acceptor_chrpos*/1,mate_chrpos,
- /*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
+ /*hardclip_low*/circularpos,/*hardclip_high*/0,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
- donor_strand,acceptor_strand,donor_chr,acceptor_chr,
+ acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
} else {
@@ -5655,10 +3313,10 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
- clipdir,hardclip_low,hardclip_high,resulttype,first_read_p,
+ hardclip_low,hardclip_high,resulttype,first_read_p,
npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
- donor_strand,acceptor_strand,donor_chr,acceptor_chr,
+ acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/false);
}
@@ -5669,20 +3327,20 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
- /*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+ /*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
- donor_strand,acceptor_strand,donor_chr,acceptor_chr,
+ acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
/*concordant_chrpos*/1,donor_chrpos,/*acceptor_chrpos*/1,mate_chrpos,
- /*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
+ /*hardclip_low*/circularpos,/*hardclip_high*/0,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
- donor_strand,acceptor_strand,donor_chr,acceptor_chr,
+ acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
} else {
@@ -5690,10 +3348,10 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
- clipdir,hardclip_low,hardclip_high,resulttype,first_read_p,
+ hardclip_low,hardclip_high,resulttype,first_read_p,
npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
- donor_strand,acceptor_strand,donor_chr,acceptor_chr,
+ acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/false);
}
@@ -5703,20 +3361,20 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
- /*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+ /*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
- donor_strand,acceptor_strand,donor_chr,acceptor_chr,
+ donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
/*concordant_chrpos*/1,/*donor_chrpos*/1,acceptor_chrpos,mate_chrpos,
- /*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
+ /*hardclip_low*/circularpos,/*hardclip_high*/0,
resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
- donor_strand,acceptor_strand,donor_chr,acceptor_chr,
+ donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/true);
} else {
@@ -5724,16 +3382,83 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
absmq_score,first_absmq,second_absmq,mapq_score,
chromosome_iit,queryseq,pairedlength,
concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
- clipdir,hardclip_low,hardclip_high,resulttype,first_read_p,
+ hardclip_low,hardclip_high,resulttype,first_read_p,
npaths_mate,quality_shift,sam_read_group_id,
invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
- donor_strand,acceptor_strand,donor_chr,acceptor_chr,
+ donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
/*circularp*/false);
}
} else {
- abort();
+ /* SENSE_NULL (DNA distant chimera) */
+ if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
+ print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,
+ chromosome_iit,queryseq,pairedlength,
+ concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
+ /*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+ resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
+ acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
+ donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
+ /*circularp*/true);
+ print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,
+ chromosome_iit,queryseq,pairedlength,
+ /*concordant_chrpos*/1,donor_chrpos,/*acceptor_chrpos*/1,mate_chrpos,
+ /*hardclip_low*/circularpos,/*hardclip_high*/0,
+ resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
+ acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
+ donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
+ /*circularp*/true);
+ } else {
+ print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,
+ chromosome_iit,queryseq,pairedlength,
+ concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
+ hardclip_low,hardclip_high,resulttype,first_read_p,
+ npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
+ acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
+ donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
+ /*circularp*/false);
+ }
+
+ if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
+ print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,
+ chromosome_iit,queryseq,pairedlength,
+ concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
+ /*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+ resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
+ donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
+ donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
+ /*circularp*/true);
+ print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,
+ chromosome_iit,queryseq,pairedlength,
+ /*concordant_chrpos*/1,/*donor_chrpos*/1,acceptor_chrpos,mate_chrpos,
+ /*hardclip_low*/circularpos,/*hardclip_high*/0,
+ resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
+ donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
+ donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
+ /*circularp*/true);
+ } else {
+ print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,
+ chromosome_iit,queryseq,pairedlength,
+ concordant_chrpos,donor_chrpos,acceptor_chrpos,mate_chrpos,
+ hardclip_low,hardclip_high,resulttype,first_read_p,
+ npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
+ donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
+ donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
+ /*circularp*/false);
+ }
}
if (allocp == true) {
@@ -5747,8 +3472,8 @@ print_exon_exon (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
void
-SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
- char *acc1, char *acc2, int pathnum, int npaths,
+SAM_print (Filestring_T fp, Filestring_T fp_failedinput, char *abbrev,
+ Stage3end_T this, Stage3end_T mate, char *acc1, char *acc2, int pathnum, int npaths,
int absmq_score, int first_absmq, int second_absmq, int mapq_score, Univ_IIT_T chromosome_iit, Shortread_T queryseq,
Shortread_T queryseq_mate, int pairedlength, Chrpos_T chrpos, Chrpos_T mate_chrpos,
int clipdir, int hardclip5_low, int hardclip5_high, int hardclip3_low, int hardclip3_high,
@@ -5756,12 +3481,8 @@ SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
int npaths_mate, int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
bool merge_samechr_p) {
Hittype_T hittype;
- Substring_T donor, acceptor;
- bool sensep, normalp;
unsigned int flag;
int circularpos, querylength;
- char donor_strand, acceptor_strand;
- int sensedir;
hittype = Stage3end_hittype(this);
@@ -5774,412 +3495,33 @@ SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
/*npaths*/0,npaths_mate,mate_chrpos,quality_shift,
sam_read_group_id,invertp,invert_mate_p);
- if (failedinput_root != NULL) {
- if (fastq_format_p == true) {
- if (first_read_p == true) {
- Shortread_print_query_singleend_fastq(fp_failedinput_1,queryseq,/*headerseq*/queryseq);
- } else {
- Shortread_print_query_singleend_fastq(fp_failedinput_1,queryseq,/*headerseq*/queryseq_mate);
- }
- } else {
- if (first_read_p == true) {
- Shortread_print_query_singleend_fasta(fp_failedinput_2,queryseq,/*headerseq*/queryseq);
- } else {
- Shortread_print_query_singleend_fasta(fp_failedinput_2,queryseq,/*headerseq*/queryseq_mate);
- }
- }
- }
-
- } else if (hittype == EXACT || hittype == SUB || hittype == TERMINAL) {
- querylength = Shortread_fulllength(queryseq);
- if ((circularpos = Stage3end_circularpos(this)) > 0 &&
- check_cigar_single(hittype,this,querylength,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
- first_read_p,/*circularp*/true) == true &&
- check_cigar_single(hittype,this,querylength,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
- first_read_p,/*circularp*/true) == true) {
-#ifdef CHECK_ASSERTIONS
- if (Stage3end_plusp(this) == true) {
- assert(chrpos-Stage3end_trim_left(this)+circularpos-Stage3end_chrlength(this) == 1);
- } else {
- assert(chrpos-Stage3end_trim_right(this)+circularpos-Stage3end_chrlength(this) == 1);
- }
-#endif
- print_single(fp,abbrev,hittype,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
- resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/true);
- print_single(fp,abbrev,hittype,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- /*chrpos*/1,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
- resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/true);
- } else if (first_read_p == true) {
- print_single(fp,abbrev,hittype,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,clipdir,hardclip5_low,hardclip5_high,resulttype,/*first_read_p*/true,
- npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/false);
- } else {
- print_single(fp,abbrev,hittype,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,clipdir,hardclip3_low,hardclip3_high,resulttype,/*first_read_p*/false,
- npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/false);
- }
-
- } else if (hittype == INSERTION) {
- querylength = Shortread_fulllength(queryseq);
- if ((circularpos = Stage3end_circularpos(this)) > 0 &&
- check_cigar_insertion(this,querylength,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
- first_read_p,/*circularp*/true) == true &&
- check_cigar_insertion(this,querylength,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
- first_read_p,/*circularp*/true) == true) {
- print_insertion(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
- resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/true);
- print_insertion(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- /*chrpos*/1,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
- resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/true);
- } else if (first_read_p == true) {
- print_insertion(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,clipdir,hardclip5_low,hardclip5_high,resulttype,/*first_read_p*/true,
- npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/false);
- } else {
- print_insertion(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,clipdir,hardclip3_low,hardclip3_high,resulttype,/*first_read_p*/false,
- npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/false);
- }
-
- } else if (hittype == DELETION) {
- querylength = Shortread_fulllength(queryseq);
- if ((circularpos = Stage3end_circularpos(this)) > 0 &&
- check_cigar_deletion(this,querylength,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
- first_read_p,/*circularp*/true) == true &&
- check_cigar_deletion(this,querylength,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
- first_read_p,/*circularp*/true) == true) {
- print_deletion(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
- resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/true);
- print_deletion(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- /*chrpos*/1,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
- resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/true);
- } else if (first_read_p == true) {
- print_deletion(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,clipdir,hardclip5_low,hardclip5_high,resulttype,/*first_read_p*/true,
- npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/false);
- } else {
- print_deletion(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,clipdir,hardclip3_low,hardclip3_high,resulttype,/*first_read_p*/false,
- npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/false);
- }
-
- } else if (hittype == HALFSPLICE_DONOR) {
- donor = Stage3end_substring_donor(this);
-
- /* Code taken from that for XS tag for print_halfdonor and print_halfacceptor */
- if ((sensedir = Substring_chimera_sensedir(donor)) == SENSE_FORWARD) {
- if (Substring_plusp(donor) == true) {
- donor_strand = '+';
- } else {
- donor_strand = '-';
- }
- } else if (sensedir == SENSE_ANTI) {
- if (Substring_plusp(donor) == true) {
- donor_strand = '-';
- } else {
- donor_strand = '+';
- }
- } else if (force_xs_direction_p == true) {
- donor_strand = '+';
- } else {
- donor_strand = '?';
- }
-
- querylength = Shortread_fulllength(queryseq);
- if ((circularpos = Stage3end_circularpos(this)) > 0 &&
- check_cigar_halfdonor(donor,querylength,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
- first_read_p,/*circularp*/true) == true &&
- check_cigar_halfdonor(donor,querylength,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
- first_read_p,/*circularp*/true) == true) {
- print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- /*concordant_chrpos*/chrpos,chrpos,/*acceptor_chrpos*/-1U,mate_chrpos,
- /*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
- resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*use_hardclip_p*/false,/*print_xt_p*/false,
- donor_strand,/*acceptor_strand*/'\0',/*donor_chr*/NULL,/*acceptor_chr*/NULL,
- /*donor1*/'X',/*donor2*/'X',/*acceptor2*/'X',/*acceptor1*/'X',
- /*donor_prob*/0.0,/*acceptor_prob*/0.0,/*circularp*/true);
- print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- /*concordant_chrpos*/1,/*chrpos*/1,/*acceptor_chrpos*/-1U,mate_chrpos,
- /*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
- resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*use_hardclip_p*/false,/*print_xt_p*/false,
- donor_strand,/*acceptor_strand*/'\0',/*donor_chr*/NULL,/*acceptor_chr*/NULL,
- /*donor1*/'X',/*donor2*/'X',/*acceptor2*/'X',/*acceptor1*/'X',
- /*donor_prob*/0.0,/*acceptor_prob*/0.0,/*circularp*/true);
- } else if (first_read_p == true) {
- print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- /*concordant_chrpos*/chrpos,chrpos,/*acceptor_chrpos*/-1U,mate_chrpos,
- clipdir,hardclip5_low,hardclip5_high,resulttype,/*first_read_p*/true,
- npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*use_hardclip_p*/false,/*print_xt_p*/false,
- donor_strand,/*acceptor_strand*/'\0',/*donor_chr*/NULL,/*acceptor_chr*/NULL,
- /*donor1*/'X',/*donor2*/'X',/*acceptor2*/'X',/*acceptor1*/'X',
- /*donor_prob*/0.0,/*acceptor_prob*/0.0,/*circularp*/false);
- } else {
- print_halfdonor(fp,abbrev,donor,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- /*concordant_chrpos*/chrpos,chrpos,/*acceptor_chrpos*/-1U,mate_chrpos,
- clipdir,hardclip3_low,hardclip3_high,resulttype,/*first_read_p*/false,
- npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*use_hardclip_p*/false,/*print_xt_p*/false,
- donor_strand,/*acceptor_strand*/'\0',/*donor_chr*/NULL,/*acceptor_chr*/NULL,
- /*donor1*/'X',/*donor2*/'X',/*acceptor2*/'X',/*acceptor1*/'X',
- /*donor_prob*/0.0,/*acceptor_prob*/0.0,/*circularp*/false);
- }
-
- } else if (hittype == HALFSPLICE_ACCEPTOR) {
- acceptor = Stage3end_substring_acceptor(this);
-
- /* Code taken from that for XS tag for print_halfdonor and print_halfacceptor */
- if ((sensedir = Substring_chimera_sensedir(acceptor)) == SENSE_FORWARD) {
- if (Substring_plusp(acceptor) == true) {
- acceptor_strand = '+';
- } else {
- acceptor_strand = '-';
- }
- } else if (sensedir == SENSE_ANTI) {
- if (Substring_plusp(acceptor) == true) {
- acceptor_strand = '-';
- } else {
- acceptor_strand = '+';
- }
- } else if (force_xs_direction_p == true) {
- acceptor_strand = '+';
- } else {
- acceptor_strand = '?';
- }
-
- querylength = Shortread_fulllength(queryseq);
- if ((circularpos = Stage3end_circularpos(this)) > 0 &&
- check_cigar_halfacceptor(acceptor,querylength,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
- first_read_p,/*circularp*/true) == true &&
- check_cigar_halfacceptor(acceptor,querylength,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
- first_read_p,/*circularp*/true) == true) {
- print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- /*concordant_chrpos*/chrpos,/*donor_chrpos*/-1U,chrpos,mate_chrpos,
- /*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
- resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*use_hardclip_p*/false,/*print_xt_p*/false,
- /*donor_strand*/'\0',acceptor_strand,/*donor_chr*/NULL,/*acceptor_chr*/NULL,
- /*donor1*/'X',/*donor2*/'X',/*acceptor2*/'X',/*acceptor1*/'X',
- /*donor_prob*/0.0,/*acceptor_prob*/0.0,/*circularp*/true);
- print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- /*concordant_chrpos*/1,/*donor_chrpos*/-1U,/*chrpos*/1,mate_chrpos,
- /*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
- resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*use_hardclip_p*/false,/*print_xt_p*/false,
- /*donor_strand*/'\0',acceptor_strand,/*donor_chr*/NULL,/*acceptor_chr*/NULL,
- /*donor1*/'X',/*donor2*/'X',/*acceptor2*/'X',/*acceptor1*/'X',
- /*donor_prob*/0.0,/*acceptor_prob*/0.0,/*circularp*/true);
- } else if (first_read_p == true) {
- print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- /*concordant_chrpos*/chrpos,/*donor_chrpos*/-1U,chrpos,mate_chrpos,
- clipdir,hardclip5_low,hardclip5_high,resulttype,/*first_read_p*/true,
- npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*use_hardclip_p*/false,/*print_xt_p*/false,
- /*donor_strand*/'\0',acceptor_strand,/*donor_chr*/NULL,/*acceptor_chr*/NULL,
- /*donor1*/'X',/*donor2*/'X',/*acceptor2*/'X',/*acceptor1*/'X',
- /*donor_prob*/0.0,/*acceptor_prob*/0.0,/*circularp*/false);
- } else {
- print_halfacceptor(fp,abbrev,acceptor,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- /*concordant_chrpos*/chrpos,/*donor_chrpos*/-1U,chrpos,mate_chrpos,
- clipdir,hardclip3_low,hardclip3_high,resulttype,/*first_read_p*/false,
- npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*use_hardclip_p*/false,/*print_xt_p*/false,
- /*donor_strand*/'\0',acceptor_strand,/*donor_chr*/NULL,/*acceptor_chr*/NULL,
- /*donor1*/'X',/*donor2*/'X',/*acceptor2*/'X',/*acceptor1*/'X',
- /*donor_prob*/0.0,/*acceptor_prob*/0.0,/*circularp*/false);
- }
-
- } else if (hittype == SPLICE || hittype == SAMECHR_SPLICE || hittype == TRANSLOC_SPLICE) {
- /* Follows print_splice_distance() in substring.c */
- donor = Stage3end_substring_donor(this);
- acceptor = Stage3end_substring_acceptor(this);
-
- if (donor == NULL || acceptor == NULL) {
- abort();
- } else if (hittype == TRANSLOC_SPLICE || (hittype == SAMECHR_SPLICE && merge_samechr_p == false)) {
- /* Stage3end_chrnum(this) == 0 || Stage3end_distance(this) == 0U */
- /* distant splice */
+ if (fp_failedinput != NULL) {
if (first_read_p == true) {
- print_exon_exon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- mate_chrpos,clipdir,hardclip5_low,hardclip5_high,resulttype,/*first_read_p*/true,
- npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p);
- } else {
- print_exon_exon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- mate_chrpos,clipdir,hardclip3_low,hardclip3_high,resulttype,/*first_read_p*/false,
- npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p);
- }
- } else {
- normalp = true;
- sensep = (Stage3end_sensedir(this) == SENSE_FORWARD);
-
- if (Substring_plusp(donor) != Substring_plusp(acceptor)) {
- /* inversion */
- normalp = false;
- } else if (Substring_plusp(donor) == true) {
- if (sensep == true) {
- if (Substring_genomicstart(acceptor) < Substring_genomicstart(donor)) {
- /* scramble */
- normalp = false;
- }
- } else {
- if (Substring_genomicstart(donor) < Substring_genomicstart(acceptor)) {
- /* scramble */
- normalp = false;
- }
- }
- } else {
- if (sensep == true) {
- if (Substring_genomicstart(donor) < Substring_genomicstart(acceptor)) {
- /* scramble */
- normalp = false;
- }
- } else {
- if (Substring_genomicstart(acceptor) < Substring_genomicstart(donor)) {
- /* scramble */
- normalp = false;
- }
- }
- }
-
- if (normalp == true) {
- querylength = Shortread_fulllength(queryseq);
- if ((circularpos = Stage3end_circularpos(this)) > 0 &&
- check_cigar_localsplice(this,mate,querylength,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
- first_read_p,/*circularp*/true) == true &&
- check_cigar_localsplice(this,mate,querylength,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
- first_read_p,/*circularp*/true) == true) {
- print_localsplice(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
- resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/true);
- print_localsplice(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- /*chrpos*/1,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
- resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/true);
- } else if (first_read_p == true) {
- print_localsplice(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,clipdir,hardclip5_low,hardclip5_high,
- resulttype,/*first_read_p*/true,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/false);
- } else {
- print_localsplice(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,clipdir,hardclip3_low,hardclip3_high,
- resulttype,/*first_read_p*/false,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/false);
- }
-
+ Shortread_print_query_singleend(fp_failedinput,queryseq,/*headerseq*/queryseq);
} else {
- if (first_read_p == true) {
- print_exon_exon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- mate_chrpos,clipdir,hardclip5_low,hardclip5_high,
- resulttype,/*first_read_p*/true,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p);
- } else {
- print_exon_exon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- mate_chrpos,clipdir,hardclip3_low,hardclip3_high,
- resulttype,/*first_read_p*/false,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p);
- }
+ Shortread_print_query_singleend(fp_failedinput,queryseq,/*headerseq*/queryseq_mate);
}
}
-
- } else if (hittype == ONE_THIRD_SHORTEXON || hittype == TWO_THIRDS_SHORTEXON || hittype == SHORTEXON) {
- querylength = Shortread_fulllength(queryseq);
- if ((circularpos = Stage3end_circularpos(this)) > 0 &&
- check_cigar_shortexon(this,mate,querylength,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
- first_read_p,/*circularp*/true) == true &&
- check_cigar_shortexon(this,mate,querylength,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
- first_read_p,/*circularp*/true) == true) {
- print_shortexon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
- resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/true);
- print_shortexon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- /*chrpos*/1,mate_chrpos,/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
- resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/true);
- } else if (first_read_p == true) {
- print_shortexon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,clipdir,hardclip5_low,hardclip5_high,
- resulttype,/*first_read_p*/true,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/false);
- } else {
- print_shortexon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
- absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
- chrpos,mate_chrpos,clipdir,hardclip3_low,hardclip3_high,
- resulttype,/*first_read_p*/false,npaths_mate,quality_shift,sam_read_group_id,
- invertp,invert_mate_p,/*circularp*/false);
- }
} else if (hittype == GMAP) {
/* Note: sam_paired_p must be true because we are calling GMAP only on halfmapping uniq */
if (mate == NULL) {
chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,
- this,Shortread_fulllength(queryseq));
+ this,Shortread_fulllength(queryseq),/*first_read_p*/true);
mate_chrpos = 0U;
hardclip3_low = hardclip3_high = 0;
} else if (first_read_p == true) {
chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
- this,Shortread_fulllength(queryseq));
+ this,Shortread_fulllength(queryseq),/*first_read_p*/true);
mate_chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
- mate,Shortread_fulllength(queryseq_mate));
+ mate,Shortread_fulllength(queryseq_mate),/*first_read_p*/false);
} else {
chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
- this,Shortread_fulllength(queryseq));
+ this,Shortread_fulllength(queryseq),/*first_read_p*/false);
mate_chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
- mate,Shortread_fulllength(queryseq_mate));
+ mate,Shortread_fulllength(queryseq_mate),/*first_read_p*/true);
}
flag = SAM_compute_flag(Stage3end_plusp(this),mate,resulttype,first_read_p,
@@ -6187,64 +3529,120 @@ SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
invertp,invert_mate_p);
querylength = Shortread_fulllength(queryseq);
- if ((circularpos = Stage3end_circularpos(this)) > 0 &&
- Pair_check_cigar(Stage3end_pairarray(this),Stage3end_npairs(this),querylength,
- /*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
- /*watsonp*/Stage3end_plusp(this),Stage3end_cdna_direction(this),
- first_read_p,/*circularp*/true) == true &&
+ if ((circularpos = Stage3end_circularpos(this)) > 0
+#if 0
+ && Pair_check_cigar(Stage3end_pairarray(this),Stage3end_npairs(this),querylength,
+ /*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+ /*watsonp*/Stage3end_plusp(this),Stage3end_sensedir(this),
+ first_read_p,/*circularp*/true) == true &&
Pair_check_cigar(Stage3end_pairarray(this),Stage3end_npairs(this),querylength,
/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,
- /*watsonp*/Stage3end_plusp(this),Stage3end_cdna_direction(this),
- first_read_p,/*circularp*/true) == true) {
+ /*watsonp*/Stage3end_plusp(this),Stage3end_sensedir(this),
+ first_read_p,/*circularp*/true) == true
+#endif
+ ) {
Pair_print_sam(fp,abbrev,Stage3end_pairarray(this),Stage3end_npairs(this),
+ Stage3end_cigar_tokens(this),Stage3end_gmap_intronp(this),
acc1,acc2,Stage3end_chrnum(this),chromosome_iit,/*usersegment*/(Sequence_T) NULL,
Shortread_fullpointer(queryseq),Shortread_quality_string(queryseq),
/*clipdir*/+1,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,Shortread_fulllength(queryseq),
- /*watsonp*/Stage3end_plusp(this),Stage3end_cdna_direction(this),
+ /*watsonp*/Stage3end_plusp(this),Stage3end_sensedir(this),
/*chimera_part*/0,/*chimera*/NULL,quality_shift,first_read_p,
pathnum,npaths,absmq_score,first_absmq,second_absmq,chrpos,Stage3end_chrlength(this),
queryseq,resulttype,flag,/*pair_mapq_score*/mapq_score,/*end_mapq_score*/mapq_score,
Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
- mate_chrpos,Stage3end_chrlength(mate),/*mate_cdna_direction*/Stage3end_cdna_direction(mate),
- pairedlength,sam_read_group_id,invertp,/*circularp*/true,/*merged_overlap_p*/false);
+ mate_chrpos,Stage3end_chrlength(mate),/*mate_sensedir*/Stage3end_sensedir(mate),
+ pairedlength,sam_read_group_id,invertp,/*circularp*/true,/*merged_overlap_p*/false,
+ Stage3end_sarrayp(this));
Pair_print_sam(fp,abbrev,Stage3end_pairarray(this),Stage3end_npairs(this),
+ Stage3end_cigar_tokens(this),Stage3end_gmap_intronp(this),
acc1,acc2,Stage3end_chrnum(this),chromosome_iit,/*usersegment*/(Sequence_T) NULL,
Shortread_fullpointer(queryseq),Shortread_quality_string(queryseq),
/*clipdir*/+1,/*hardclip_low*/circularpos,/*hardclip_high*/0,Shortread_fulllength(queryseq),
- /*watsonp*/Stage3end_plusp(this),Stage3end_cdna_direction(this),
+ /*watsonp*/Stage3end_plusp(this),Stage3end_sensedir(this),
/*chimera_part*/0,/*chimera*/NULL,quality_shift,first_read_p,
pathnum,npaths,absmq_score,first_absmq,second_absmq,/*chrpos*/1,Stage3end_chrlength(this),
queryseq,resulttype,flag,/*pair_mapq_score*/mapq_score,/*end_mapq_score*/mapq_score,
Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
- mate_chrpos,Stage3end_chrlength(mate),/*mate_cdna_direction*/Stage3end_cdna_direction(mate),
- pairedlength,sam_read_group_id,invertp,/*circularp*/true,/*merged_overlap_p*/false);
+ mate_chrpos,Stage3end_chrlength(mate),/*mate_sensedir*/Stage3end_sensedir(mate),
+ pairedlength,sam_read_group_id,invertp,/*circularp*/true,/*merged_overlap_p*/false,
+ Stage3end_sarrayp(this));
} else if (first_read_p == true) {
Pair_print_sam(fp,abbrev,Stage3end_pairarray(this),Stage3end_npairs(this),
+ Stage3end_cigar_tokens(this),Stage3end_gmap_intronp(this),
acc1,acc2,Stage3end_chrnum(this),chromosome_iit,/*usersegment*/(Sequence_T) NULL,
Shortread_fullpointer(queryseq),Shortread_quality_string(queryseq),
clipdir,hardclip5_low,hardclip5_high,Shortread_fulllength(queryseq),
- Stage3end_plusp(this),Stage3end_cdna_direction(this),
+ Stage3end_plusp(this),Stage3end_sensedir(this),
/*chimera_part*/0,/*chimera*/NULL,quality_shift,/*first_read_p*/true,
pathnum,npaths,absmq_score,first_absmq,second_absmq,chrpos,Stage3end_chrlength(this),
queryseq,resulttype,flag,/*pair_mapq_score*/mapq_score,/*end_mapq_score*/mapq_score,
Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
- mate_chrpos,Stage3end_chrlength(mate),/*mate_cdna_direction*/Stage3end_cdna_direction(mate),
- pairedlength,sam_read_group_id,invertp,/*circularp*/false,/*merged_overlap_p*/false);
+ mate_chrpos,Stage3end_chrlength(mate),/*mate_sensedir*/Stage3end_sensedir(mate),
+ pairedlength,sam_read_group_id,invertp,/*circularp*/false,/*merged_overlap_p*/false,
+ Stage3end_sarrayp(this));
} else {
Pair_print_sam(fp,abbrev,Stage3end_pairarray(this),Stage3end_npairs(this),
+ Stage3end_cigar_tokens(this),Stage3end_gmap_intronp(this),
acc1,acc2,Stage3end_chrnum(this),chromosome_iit,/*usersegment*/(Sequence_T) NULL,
Shortread_fullpointer(queryseq),Shortread_quality_string(queryseq),
clipdir,hardclip3_low,hardclip3_high,Shortread_fulllength(queryseq),
- Stage3end_plusp(this),Stage3end_cdna_direction(this),
+ Stage3end_plusp(this),Stage3end_sensedir(this),
/*chimera_part*/0,/*chimera*/NULL,quality_shift,/*first_read_p*/false,
pathnum,npaths,absmq_score,first_absmq,second_absmq,chrpos,Stage3end_chrlength(this),
queryseq,resulttype,flag,/*pair_mapq_score*/mapq_score,/*end_mapq_score*/mapq_score,
Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
- mate_chrpos,Stage3end_chrlength(mate),/*mate_cdna_direction*/Stage3end_cdna_direction(mate),
- pairedlength,sam_read_group_id,invertp,/*circularp*/false,/*merged_overlap_p*/false);
+ mate_chrpos,Stage3end_chrlength(mate),/*mate_sensedir*/Stage3end_sensedir(mate),
+ pairedlength,sam_read_group_id,invertp,/*circularp*/false,/*merged_overlap_p*/false,
+ Stage3end_sarrayp(this));
}
+
+ } else if (hittype == TRANSLOC_SPLICE || (hittype == SAMECHR_SPLICE && merge_samechr_p == false)) {
+ if (first_read_p == true) {
+ print_exon_exon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
+ mate_chrpos,hardclip5_low,hardclip5_high,resulttype,/*first_read_p*/true,
+ npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p);
+ } else {
+ print_exon_exon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
+ mate_chrpos,hardclip3_low,hardclip3_high,resulttype,/*first_read_p*/false,
+ npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p);
+ }
+
} else {
- abort();
+ querylength = Shortread_fulllength(queryseq);
+ if ((circularpos = Stage3end_circularpos(this)) > 0
+#if 0
+ && check_cigar_single(hittype,this,querylength,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos) == true &&
+ check_cigar_single(hittype,this,querylength,/*hardclip_low*/circularpos,/*hardclip_high*/0) == true
+#endif
+ ) {
+ print_substrings(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,queryseq,pairedlength,
+ chrpos,mate_chrpos,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+ resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p,/*circularp*/true);
+ print_substrings(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,queryseq,pairedlength,
+ /*chrpos*/1,mate_chrpos,/*hardclip_low*/circularpos,/*hardclip_high*/0,
+ resulttype,first_read_p,npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p,/*circularp*/true);
+ } else if (first_read_p == true) {
+ print_substrings(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,queryseq,pairedlength,
+ chrpos,mate_chrpos,hardclip5_low,hardclip5_high,resulttype,/*first_read_p*/true,
+ npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p,/*circularp*/false);
+ } else {
+ print_substrings(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths,
+ absmq_score,first_absmq,second_absmq,mapq_score,queryseq,pairedlength,
+ chrpos,mate_chrpos,hardclip3_low,hardclip3_high,resulttype,/*first_read_p*/false,
+ npaths_mate,quality_shift,sam_read_group_id,
+ invertp,invert_mate_p,/*circularp*/false);
+ }
}
return;
@@ -6253,11 +3651,11 @@ SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
void
-SAM_print_paired (Result_T result, Resulttype_T resulttype,
- Univ_IIT_T chromosome_iit, Shortread_T queryseq1, Shortread_T queryseq2,
- bool invert_first_p, bool invert_second_p,
- bool nofailsp, bool failsonlyp, bool clip_overlap_p, bool merge_overlap_p,
- bool merge_samechr_p, int quality_shift, char *sam_read_group_id) {
+SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2,
+ Result_T result, Resulttype_T resulttype, Univ_IIT_T chromosome_iit,
+ Shortread_T queryseq1, Shortread_T queryseq2, bool invert_first_p, bool invert_second_p,
+ bool nofailsp, bool failsonlyp, bool merge_samechr_p,
+ int quality_shift, char *sam_read_group_id) {
Stage3pair_T *stage3pairarray, stage3pair;
Stage3end_T *stage3array1, *stage3array2, stage3, mate, hit5, hit3;
Chrpos_T chrpos, chrpos5, chrpos3;
@@ -6266,8 +3664,7 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
int hardclip5_low = 0, hardclip5_high = 0, hardclip3_low = 0, hardclip3_high = 0, clipdir;
char *acc1, *acc2;
Pairtype_T pairtype;
- FILE *fp, *fp_xs;
- char *abbrev, *abbrev_xs;
+ char *abbrev;
struct Pair_T *pairarray;
int npairs;
@@ -6284,24 +3681,20 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
return;
} else
- SAM_print_nomapping(fp_nomapping,ABBREV_NOMAPPING_1,queryseq1,/*mate*/(Stage3end_T) NULL,
+ Filestring_set_split_output(fp,OUTPUT_NM);
+ SAM_print_nomapping(fp,ABBREV_NOMAPPING_1,queryseq1,/*mate*/(Stage3end_T) NULL,
acc1,acc2,chromosome_iit,resulttype,
/*first_read_p*/true,/*npaths*/0,/*npaths_mate*/0,
/*mate_chrpos*/0U,quality_shift,
sam_read_group_id,invert_first_p,invert_second_p);
- SAM_print_nomapping(fp_nomapping,ABBREV_NOMAPPING_2,queryseq2,/*mate*/(Stage3end_T) NULL,
+ SAM_print_nomapping(fp,ABBREV_NOMAPPING_2,queryseq2,/*mate*/(Stage3end_T) NULL,
acc1,acc2,chromosome_iit,resulttype,
/*first_read_p*/false,/*npaths*/0,/*npaths_mate*/0,
/*mate_chrpos*/0U,quality_shift,
sam_read_group_id,invert_second_p,invert_first_p);
- if (failedinput_root != NULL) {
- if (fastq_format_p == true) {
- Shortread_print_query_pairedend_fastq(fp_failedinput_1,fp_failedinput_2,queryseq1,queryseq2,
- invert_first_p,invert_second_p);
- } else {
- Shortread_print_query_pairedend_fasta(fp_failedinput_1,queryseq1,queryseq2,
- invert_first_p,invert_second_p);
- }
+
+ if (fp_failedinput_1 != NULL) {
+ Shortread_print_query_pairedend(fp_failedinput_1,fp_failedinput_2,queryseq1,queryseq2);
}
} else {
@@ -6320,29 +3713,30 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
/* Don't resolve overlaps on a circular alignment */
clipdir = 0;
hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
- fp = fp_concordant_circular;
+ Filestring_set_split_output(fp,OUTPUT_CC);
abbrev = ABBREV_CONCORDANT_CIRCULAR;
} else if (clip_overlap_p == false && merge_overlap_p == false) {
clipdir = 0;
hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
- fp = fp_concordant_uniq;
+ Filestring_set_split_output(fp,OUTPUT_CU);
abbrev = ABBREV_CONCORDANT_UNIQ;
} else {
clipdir = Stage3pair_overlap(&hardclip5_low,&hardclip5_high,&hardclip3_low,&hardclip3_high,stage3pair);
debug3(printf("clipdir %d with hardclip5 = %d..%d, hardclip3 = %d..%d\n",
clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high));
- fp = fp_concordant_uniq;
+ Filestring_set_split_output(fp,OUTPUT_CU);
abbrev = ABBREV_CONCORDANT_UNIQ;
}
- chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2));
+ chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1),/*first_read_p*/true);
+ chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
if (merge_overlap_p == false || clipdir == 0) {
/* print first end */
- SAM_print(fp,abbrev,hit5,/*mate*/hit3,acc1,acc2,/*pathnum*/1,/*npaths*/1,
+ SAM_print(fp,fp_failedinput_1,abbrev,hit5,/*mate*/hit3,
+ acc1,acc2,/*pathnum*/1,/*npaths*/1,
Stage3pair_absmq_score(stage3pair),first_absmq,/*second_absmq*/0,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
@@ -6353,7 +3747,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
merge_samechr_p);
/* print second end */
- SAM_print(fp,abbrev,hit3,/*mate*/hit5,acc1,acc2,/*pathnum*/1,/*npaths*/1,
+ SAM_print(fp,fp_failedinput_2,abbrev,hit3,/*mate*/hit5,
+ acc1,acc2,/*pathnum*/1,/*npaths*/1,
Stage3pair_absmq_score(stage3pair),first_absmq,/*second_absmq*/0,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
@@ -6367,8 +3762,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
/* merge_overlap_p == true and overlap was found */
pairarray = Stage3pair_merge(&npairs,&querylength_merged,&queryseq_merged,&quality_merged,
stage3pair,queryseq1,queryseq2,
- /*querylength5*/Shortread_fulllength(queryseq1),
- /*querylength3*/Shortread_fulllength(queryseq2),
+ /*querylength5*/Stage3end_querylength(hit5),
+ /*querylength3*/Stage3end_querylength(hit3),
clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high);
/* printf("queryseq_merged: %s\n",queryseq_merged); */
if (clipdir >= 0) {
@@ -6381,11 +3776,12 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
/*pathnum*/1,/*npaths*/1,/*npaths_mate*/0,
Stage3pair_absmq_score(stage3pair),first_absmq,/*invertp*/false,
/*invert_mate_p*/false);
- Pair_print_sam(fp_unpaired_uniq,/*abbrev*/ABBREV_UNPAIRED_UNIQ,pairarray,npairs,
+ Filestring_set_split_output(fp,OUTPUT_UU);
+ Pair_print_sam(fp,/*abbrev*/ABBREV_UNPAIRED_UNIQ,pairarray,npairs,/*cigar_tokens*/NULL,/*gmap_intronp*/false,
acc1,/*acc2*/NULL,Stage3end_chrnum(hit5),chromosome_iit,/*usersegment*/(Sequence_T) NULL,
/*queryseq_ptr*/queryseq_merged,/*quality_string*/quality_merged,
/*clipdir*/0,/*hardclip_low*/0,/*hardclip_high*/0,/*querylength*/querylength_merged,
- Stage3end_plusp(hit5),Stage3end_cdna_direction(hit5),
+ Stage3end_plusp(hit5),Stage3end_sensedir(hit5),
/*chimera_part*/0,/*chimera*/NULL,quality_shift,/*first_read_p*/true,
/*pathnum*/1,/*npaths*/1,
#if 0
@@ -6396,8 +3792,9 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
chrpos,Stage3end_chrlength(hit5),/*queryseq*/NULL,resulttype,flag,
/*pair_mapq_score*/MAX_QUALITY_SCORE,/*end_mapq_score*/MAX_QUALITY_SCORE,
/*mate_chrnum*/0,/*mate_effective_chrnum*/0,/*mate_chrpos*/0,/*mate_chrlength*/0,
- /*mate_cdna_direction*/0,/*pairedlength*/0,
- sam_read_group_id,/*invertp*/false,/*circularp*/false,/*merged_overlap_p*/true);
+ /*mate_sensedir*/SENSE_NULL,/*pairedlength*/0,
+ sam_read_group_id,/*invertp*/false,/*circularp*/false,/*merged_overlap_p*/true,
+ Stage3end_sarrayp(hit5));
if (quality_merged != NULL) {
FREE_OUT(quality_merged);
}
@@ -6406,25 +3803,23 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
}
} else if (resulttype == CONCORDANT_TRANSLOC) {
+ Filestring_set_split_output(fp,OUTPUT_CT);
stage3pairarray = (Stage3pair_T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
if (quiet_if_excessive_p && npaths > maxpaths_report) {
- if (1 || failedinput_root != NULL) {
- /* Not able to print as input */
- /* Print as nomapping, but send to fp_concordant_transloc */
- SAM_print_nomapping(fp_concordant_transloc,ABBREV_CONCORDANT_TRANSLOC,
- queryseq1,/*mate*/(Stage3end_T) NULL,
- acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/true,npaths,/*npaths_mate*/npaths,
- /*mate_chrpos*/0U,quality_shift,
- sam_read_group_id,invert_first_p,invert_second_p);
- SAM_print_nomapping(fp_concordant_transloc,ABBREV_CONCORDANT_TRANSLOC,
- queryseq2,/*mate*/(Stage3end_T) NULL,
- acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/false,npaths,/*npaths_mate*/npaths,
- /*mate_chrpos*/0U,quality_shift,
- sam_read_group_id,invert_second_p,invert_first_p);
- }
+ /* Print as nomapping, but send to fp_concordant_transloc */
+ SAM_print_nomapping(fp,ABBREV_CONCORDANT_TRANSLOC,
+ queryseq1,/*mate*/(Stage3end_T) NULL,
+ acc1,acc2,chromosome_iit,resulttype,
+ /*first_read_p*/true,npaths,/*npaths_mate*/npaths,
+ /*mate_chrpos*/0U,quality_shift,
+ sam_read_group_id,invert_first_p,invert_second_p);
+ SAM_print_nomapping(fp,ABBREV_CONCORDANT_TRANSLOC,
+ queryseq2,/*mate*/(Stage3end_T) NULL,
+ acc1,acc2,chromosome_iit,resulttype,
+ /*first_read_p*/false,npaths,/*npaths_mate*/npaths,
+ /*mate_chrpos*/0U,quality_shift,
+ sam_read_group_id,invert_second_p,invert_first_p);
} else {
/* Stage3pair_eval(stage3pairarray,npaths,maxpaths_report,queryseq1,queryseq2); */
@@ -6450,11 +3845,11 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high));
}
- chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2));
+ chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1),/*first_read_p*/true);
+ chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
/* print first end */
- SAM_print(fp_concordant_transloc,ABBREV_CONCORDANT_TRANSLOC,
+ SAM_print(fp,fp_failedinput_1,ABBREV_CONCORDANT_TRANSLOC,
hit5,/*mate*/hit3,acc1,acc2,pathnum,npaths,
Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
@@ -6466,7 +3861,7 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
merge_samechr_p);
/* print second end */
- SAM_print(fp_concordant_transloc,ABBREV_CONCORDANT_TRANSLOC,
+ SAM_print(fp,fp_failedinput_2,ABBREV_CONCORDANT_TRANSLOC,
hit3,/*mate*/hit5,acc1,acc2,pathnum,npaths,
Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
@@ -6484,30 +3879,28 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
if (quiet_if_excessive_p && npaths > maxpaths_report) {
/* Print as nomapping, but send to fp_concordant_mult_xs */
- SAM_print_nomapping(fp_concordant_mult_xs_1,ABBREV_CONCORDANT_MULT_XS,
+ Filestring_set_split_output(fp,OUTPUT_CX);
+ SAM_print_nomapping(fp,ABBREV_CONCORDANT_MULT_XS,
queryseq1,/*mate*/(Stage3end_T) NULL,
acc1,acc2,chromosome_iit,resulttype,
/*first_read_p*/true,npaths,/*npaths_mate*/npaths,
/*mate_chrpos*/0U,quality_shift,
sam_read_group_id,invert_first_p,invert_second_p);
- SAM_print_nomapping(fp_concordant_mult_xs_1,ABBREV_CONCORDANT_MULT_XS,
+ SAM_print_nomapping(fp,ABBREV_CONCORDANT_MULT_XS,
queryseq2,/*mate*/(Stage3end_T) NULL,
acc1,acc2,chromosome_iit,resulttype,
/*first_read_p*/false,npaths,/*npaths_mate*/npaths,
/*mate_chrpos*/0U,quality_shift,
sam_read_group_id,invert_second_p,invert_first_p);
- if (failedinput_root != NULL) {
- if (fastq_format_p == true) {
- Shortread_print_query_pairedend_fastq(fp_failedinput_1,fp_failedinput_2,queryseq1,queryseq2,
- invert_first_p,invert_second_p);
- } else {
- Shortread_print_query_pairedend_fasta(fp_failedinput_1,queryseq1,queryseq2,
- invert_first_p,invert_second_p);
- }
+
+ if (fp_failedinput_1 != NULL) {
+ Shortread_print_query_pairedend(fp_failedinput_1,fp_failedinput_2,queryseq1,queryseq2);
+
}
} else {
/* Stage3pair_eval(stage3pairarray,npaths,maxpaths_report,queryseq1,queryseq2); */
+ Filestring_set_split_output(fp,OUTPUT_CM);
for (pathnum = 1; pathnum <= npaths && pathnum <= maxpaths_report; pathnum++) {
stage3pair = stage3pairarray[pathnum-1];
@@ -6529,12 +3922,12 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high));
}
- chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2));
+ chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1),/*first_read_p*/true);
+ chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
if (merge_overlap_p == false || clipdir == 0) {
/* print first end */
- SAM_print(fp_concordant_mult,ABBREV_CONCORDANT_MULT,
+ SAM_print(fp,fp_failedinput_1,ABBREV_CONCORDANT_MULT,
hit5,/*mate*/hit3,acc1,acc2,pathnum,npaths,
Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
@@ -6546,7 +3939,7 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
merge_samechr_p);
/* print second end */
- SAM_print(fp_concordant_mult,ABBREV_CONCORDANT_MULT,
+ SAM_print(fp,fp_failedinput_2,ABBREV_CONCORDANT_MULT,
hit3,/*mate*/hit5,acc1,acc2,pathnum,npaths,
Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
@@ -6561,8 +3954,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
/* merge_overlap_p == true and overlap was found */
pairarray = Stage3pair_merge(&npairs,&querylength_merged,&queryseq_merged,&quality_merged,
stage3pair,queryseq1,queryseq2,
- /*querylength5*/Shortread_fulllength(queryseq1),
- /*querylength3*/Shortread_fulllength(queryseq2),
+ /*querylength5*/Stage3end_querylength(hit5),
+ /*querylength3*/Stage3end_querylength(hit3),
clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high);
/* printf("queryseq_merged: %s\n",queryseq_merged); */
if (clipdir >= 0) {
@@ -6575,12 +3968,12 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
/*pathnum*/1,/*npaths*/1,/*npaths_mate*/0,
Stage3pair_absmq_score(stage3pair),first_absmq,/*invertp*/false,
/*invert_mate_p*/false);
- Pair_print_sam(fp_concordant_mult,ABBREV_CONCORDANT_MULT,pairarray,npairs,
+ Pair_print_sam(fp,ABBREV_CONCORDANT_MULT,pairarray,npairs,/*cigar_tokens*/NULL,/*gmap_intronp*/false,
acc1,/*acc2*/NULL,Stage3end_chrnum(hit5),chromosome_iit,
/*usersegment*/(Sequence_T) NULL,
/*queryseq_ptr*/queryseq_merged,/*quality_string*/quality_merged,
/*clipdir*/0,/*hardclip_low*/0,/*hardclip_high*/0,/*querylength*/querylength_merged,
- Stage3end_plusp(hit5),Stage3end_cdna_direction(hit5),
+ Stage3end_plusp(hit5),Stage3end_sensedir(hit5),
/*chimera_part*/0,/*chimera*/NULL,quality_shift,/*first_read_p*/true,pathnum,npaths,
#if 0
Stage3pair_absmq_score(stage3pair),first_absmq,/*second_absmq*/0,
@@ -6590,8 +3983,9 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
chrpos,Stage3end_chrlength(hit5),/*queryseq*/NULL,resulttype,flag,
/*pair_mapq_score*/MAX_QUALITY_SCORE,/*end_mapq_score*/MAX_QUALITY_SCORE,
/*mate_chrnum*/0,/*mate_effective_chrnum*/0,/*mate_chrpos*/0,/*mate_chrlength*/0,
- /*mate_cdna_direction*/0,/*pairedlength*/0,
- sam_read_group_id,/*invertp*/false,/*circularp*/false,/*merged_overlap_p*/true);
+ /*mate_sensedir*/SENSE_NULL,/*pairedlength*/0,
+ sam_read_group_id,/*invertp*/false,/*circularp*/false,/*merged_overlap_p*/true,
+ Stage3end_sarrayp(hit5));
if (quality_merged != NULL) {
FREE_OUT(quality_merged);
}
@@ -6607,16 +4001,16 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
stage3pair = stage3pairarray[0];
if (Stage3pair_circularp(stage3pair) == true) {
- fp = fp_paired_uniq_circular;
+ Filestring_set_split_output(fp,OUTPUT_PC);
abbrev = ABBREV_PAIRED_UNIQ_CIRCULAR;
} else if ((pairtype = Stage3pair_pairtype(stage3pair)) == PAIRED_INVERSION) {
- fp = fp_paired_uniq_inv;
+ Filestring_set_split_output(fp,OUTPUT_PI);
abbrev = ABBREV_PAIRED_UNIQ_INV;
} else if (pairtype == PAIRED_SCRAMBLE) {
- fp = fp_paired_uniq_scr;
+ Filestring_set_split_output(fp,OUTPUT_PS);
abbrev = ABBREV_PAIRED_UNIQ_SCR;
} else if (pairtype == PAIRED_TOOLONG) {
- fp = fp_paired_uniq_long;
+ Filestring_set_split_output(fp,OUTPUT_PL);
abbrev = ABBREV_PAIRED_UNIQ_LONG;
} else {
fprintf(stderr,"Unexpected pairtype %d\n",pairtype);
@@ -6627,11 +4021,12 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
hit5 = Stage3pair_hit5(stage3pair);
hit3 = Stage3pair_hit3(stage3pair);
- chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2));
+ chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1),/*first_read_p*/true);
+ chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
/* print first end */
- SAM_print(fp,abbrev,hit5,/*mate*/hit3,acc1,acc2,/*pathnum*/1,/*npaths*/1,
+ SAM_print(fp,fp_failedinput_1,abbrev,hit5,/*mate*/hit3,
+ acc1,acc2,/*pathnum*/1,/*npaths*/1,
Stage3pair_absmq_score(stage3pair),first_absmq,/*second_absmq*/0,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
@@ -6642,7 +4037,8 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
merge_samechr_p);
/* print second end */
- SAM_print(fp,abbrev,hit3,/*mate*/hit5,acc1,acc2,/*pathnum*/1,/*npaths*/1,
+ SAM_print(fp,fp_failedinput_2,abbrev,hit3,/*mate*/hit5,
+ acc1,acc2,/*pathnum*/1,/*npaths*/1,
Stage3pair_absmq_score(stage3pair),first_absmq,/*second_absmq*/0,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
@@ -6656,32 +4052,28 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
stage3pairarray = (Stage3pair_T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
if (quiet_if_excessive_p && npaths > maxpaths_report) {
- /* Print as nomapping, but send to fp_concordant_mult */
- SAM_print_nomapping(fp_paired_mult_xs_1,ABBREV_PAIRED_MULT_XS,
+ /* Print as nomapping, but send to fp_paired_mult */
+ Filestring_set_split_output(fp,OUTPUT_PX);
+ SAM_print_nomapping(fp,ABBREV_PAIRED_MULT_XS,
queryseq1,/*mate*/(Stage3end_T) NULL,
acc1,acc2,chromosome_iit,resulttype,
/*first_read_p*/true,npaths,/*npaths_mate*/npaths,
/*mate_chrpos*/0U,quality_shift,
sam_read_group_id,invert_first_p,invert_second_p);
- SAM_print_nomapping(fp_paired_mult_xs_1,ABBREV_PAIRED_MULT_XS,
+ SAM_print_nomapping(fp,ABBREV_PAIRED_MULT_XS,
queryseq2,/*mate*/(Stage3end_T) NULL,
acc1,acc2,chromosome_iit,resulttype,
/*first_read_p*/false,npaths,/*npaths_mate*/npaths,
/*mate_chrpos*/0U,quality_shift,
sam_read_group_id,invert_second_p,invert_first_p);
- if (failedinput_root != NULL) {
- if (fastq_format_p == true) {
- Shortread_print_query_pairedend_fastq(fp_failedinput_1,fp_failedinput_2,queryseq1,queryseq2,
- invert_first_p,invert_second_p);
- } else {
- Shortread_print_query_pairedend_fasta(fp_failedinput_1,queryseq1,queryseq2,
- invert_first_p,invert_second_p);
- }
+ if (fp_failedinput_1 != NULL) {
+ Shortread_print_query_pairedend(fp_failedinput_1,fp_failedinput_2,queryseq1,queryseq2);
}
} else {
/* Stage3pair_eval(stage3pairarray,npaths,maxpaths_report,queryseq1,queryseq2); */
+ Filestring_set_split_output(fp,OUTPUT_PM);
for (pathnum = 1; pathnum <= npaths && pathnum <= maxpaths_report; pathnum++) {
stage3pair = stage3pairarray[pathnum-1];
@@ -6689,11 +4081,11 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
hit5 = Stage3pair_hit5(stage3pair);
hit3 = Stage3pair_hit3(stage3pair);
- chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2));
+ chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1),/*first_read_p*/true);
+ chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
/* print first end */
- SAM_print(fp_paired_mult,ABBREV_PAIRED_MULT,
+ SAM_print(fp,fp_failedinput_1,ABBREV_PAIRED_MULT,
hit5,/*mate*/hit3,acc1,acc2,pathnum,npaths,
Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
@@ -6705,7 +4097,7 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
merge_samechr_p);
/* print second end */
- SAM_print(fp_paired_mult,ABBREV_PAIRED_MULT,
+ SAM_print(fp,fp_failedinput_2,ABBREV_PAIRED_MULT,
hit3,/*mate*/hit5,acc1,acc2,pathnum,npaths,
Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
Stage3pair_mapq_score(stage3pair),chromosome_iit,
@@ -6727,20 +4119,20 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
hit5 = stage3array1[0];
hit3 = stage3array2[0];
- chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1));
- chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2));
+ chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1),/*first_read_p*/true);
+ chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
if (Stage3end_circularpos(hit5) > 0 || Stage3end_circularpos(hit3) > 0) {
- fp = fp_unpaired_circular;
+ Filestring_set_split_output(fp,OUTPUT_UC);
abbrev = ABBREV_UNPAIRED_CIRCULAR;
} else {
- fp = fp_unpaired_uniq;
+ Filestring_set_split_output(fp,OUTPUT_UU);
abbrev = ABBREV_UNPAIRED_UNIQ;
}
/* print first end */
/* Stage3end_eval_and_sort(stage3array1,npaths1,maxpaths_report,queryseq1); */
- SAM_print(fp,abbrev,hit5,/*mate*/hit3,acc1,acc2,/*pathnum*/1,/*npaths*/1,
+ SAM_print(fp,fp_failedinput_1,abbrev,hit5,/*mate*/hit3,acc1,acc2,/*pathnum*/1,/*npaths*/1,
Stage3end_absmq_score(stage3array1[0]),first_absmq1,/*second_absmq*/0,
Stage3end_mapq_score(stage3array1[0]),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
@@ -6751,7 +4143,7 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
/* print second end */
/* Stage3end_eval_and_sort(stage3array2,npaths2,maxpaths_report,queryseq2); */
- SAM_print(fp,abbrev,hit3,/*mate*/hit5,acc1,acc2,/*pathnum*/1,/*npaths*/1,
+ SAM_print(fp,fp_failedinput_2,abbrev,hit3,/*mate*/hit5,acc1,acc2,/*pathnum*/1,/*npaths*/1,
Stage3end_absmq_score(stage3array2[0]),first_absmq2,/*second_absmq*/0,
Stage3end_mapq_score(stage3array2[0]),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
@@ -6762,13 +4154,15 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
} else if (resulttype == UNPAIRED_MULT || resulttype == UNPAIRED_TRANSLOC) {
if (resulttype == UNPAIRED_MULT) {
- fp = fp_unpaired_mult;
- fp_xs = fp_unpaired_mult_xs_1;
+ if (quiet_if_excessive_p && npaths1 > maxpaths_report && npaths2 > maxpaths_report) {
+ Filestring_set_split_output(fp,OUTPUT_UX);
+ } else {
+ Filestring_set_split_output(fp,OUTPUT_UM);
+ }
abbrev = ABBREV_UNPAIRED_MULT;
- abbrev_xs = ABBREV_UNPAIRED_MULT_XS;
} else {
- fp = fp_xs = fp_unpaired_transloc;
- abbrev = abbrev_xs = ABBREV_UNPAIRED_TRANSLOC;
+ Filestring_set_split_output(fp,OUTPUT_UT);
+ abbrev = ABBREV_UNPAIRED_TRANSLOC;
}
stage3array1 = (Stage3end_T *) Result_array(&npaths1,&first_absmq1,&second_absmq1,result);
@@ -6805,15 +4199,15 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
} else {
mate = stage3array2[0];
hardclip3_low = hardclip3_high = 0;
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,mate,Shortread_fulllength(queryseq2));
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,mate,Shortread_fulllength(queryseq2),/*first_read_p*/false);
}
if (npaths1 == 1) {
stage3 = stage3array1[0];
hardclip5_low = hardclip5_high = 0;
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1));
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1),/*first_read_p*/true);
- SAM_print(fp,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths1,
+ SAM_print(fp,fp_failedinput_1,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths1,
Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
@@ -6823,21 +4217,20 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
invert_first_p,invert_second_p,merge_samechr_p);
} else if (quiet_if_excessive_p && npaths1 > maxpaths_report) {
- if (1 || failedinput_root != NULL) {
- /* Just printing one end as nomapping */
- SAM_print_nomapping(fp_xs,abbrev_xs,queryseq1,mate,acc1,acc2,chromosome_iit,
- resulttype,/*first_read_p*/true,npaths1,/*npaths_mate*/npaths2,
- /*mate_chrpos*/chrpos3,
- quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
- }
+ /* Just printing one end as nomapping */
+ SAM_print_nomapping(fp,abbrev,queryseq1,mate,acc1,acc2,chromosome_iit,
+ resulttype,/*first_read_p*/true,npaths1,/*npaths_mate*/npaths2,
+ /*mate_chrpos*/chrpos3,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
} else {
for (pathnum = 1; pathnum <= npaths1 && pathnum <= maxpaths_report; pathnum++) {
stage3 = stage3array1[pathnum-1];
hardclip5_low = hardclip5_high = 0;
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,stage3,Shortread_fulllength(queryseq1));
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,stage3,Shortread_fulllength(queryseq1),
+ /*first_read_p*/true);
- SAM_print(fp,abbrev,stage3,mate,acc1,acc2,pathnum,npaths1,
+ SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,pathnum,npaths1,
Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
@@ -6858,15 +4251,17 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
} else {
mate = stage3array1[0];
hardclip5_low = hardclip5_high = 0;
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,mate,Shortread_fulllength(queryseq1));
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,mate,Shortread_fulllength(queryseq1),
+ /*first_read_p*/true);
}
if (npaths2 == 1) {
stage3 = stage3array2[0];
hardclip3_low = hardclip3_high = 0;
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2));
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2),
+ /*first_read_p*/false);
- SAM_print(fp,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths2,
+ SAM_print(fp,fp_failedinput_1,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths2,
Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
@@ -6876,21 +4271,20 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
invert_second_p,invert_first_p,merge_samechr_p);
} else if (quiet_if_excessive_p && npaths2 > maxpaths_report) {
- if (1 || failedinput_root != NULL) {
- /* Just printing one end as nomapping */
- SAM_print_nomapping(fp_xs,abbrev_xs,queryseq2,mate,acc1,acc2,chromosome_iit,
- resulttype,/*first_read_p*/false,npaths2,/*npaths_mate*/npaths1,
- /*mate_chrpos*/chrpos5,
- quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
- }
+ /* Just printing one end as nomapping */
+ SAM_print_nomapping(fp,abbrev,queryseq2,mate,acc1,acc2,chromosome_iit,
+ resulttype,/*first_read_p*/false,npaths2,/*npaths_mate*/npaths1,
+ /*mate_chrpos*/chrpos5,
+ quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
} else {
for (pathnum = 1; pathnum <= npaths2 && pathnum <= maxpaths_report; pathnum++) {
stage3 = stage3array2[pathnum-1];
hardclip3_low = hardclip3_high = 0;
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2));
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2),
+ /*first_read_p*/false);
- SAM_print(fp,abbrev,stage3,mate,acc1,acc2,pathnum,npaths2,
+ SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,pathnum,npaths2,
Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
@@ -6907,23 +4301,26 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
if (resulttype == HALFMAPPING_UNIQ) {
if (npaths1 == 1 && Stage3end_circularpos(stage3array1[0]) > 0) {
- fp = fp_xs = fp_halfmapping_circular;
- abbrev = abbrev_xs = ABBREV_HALFMAPPING_CIRCULAR;
+ Filestring_set_split_output(fp,OUTPUT_HC);
+ abbrev = ABBREV_HALFMAPPING_CIRCULAR;
} else if (npaths2 == 1 && Stage3end_circularpos(stage3array2[0]) > 0) {
- fp = fp_xs = fp_halfmapping_circular;
- abbrev = abbrev_xs = ABBREV_HALFMAPPING_CIRCULAR;
+ Filestring_set_split_output(fp,OUTPUT_HC);
+ abbrev = ABBREV_HALFMAPPING_CIRCULAR;
} else {
- fp = fp_xs = fp_halfmapping_uniq;
- abbrev = abbrev_xs = ABBREV_HALFMAPPING_UNIQ;
+ Filestring_set_split_output(fp,OUTPUT_HU);
+ abbrev = ABBREV_HALFMAPPING_UNIQ;
}
} else if (resulttype == HALFMAPPING_TRANSLOC) {
- fp = fp_xs = fp_halfmapping_transloc;
- abbrev = abbrev_xs = ABBREV_HALFMAPPING_TRANSLOC;
+ Filestring_set_split_output(fp,OUTPUT_HT);
+ abbrev = ABBREV_HALFMAPPING_TRANSLOC;
} else if (resulttype == HALFMAPPING_MULT) {
- fp = fp_halfmapping_mult;
- fp_xs = fp_halfmapping_mult_xs_1;
- abbrev = ABBREV_HALFMAPPING_MULT;
- abbrev_xs = ABBREV_HALFMAPPING_MULT_XS;
+ if (quiet_if_excessive_p == true && npaths1 > maxpaths_report && npaths2 > maxpaths_report) {
+ Filestring_set_split_output(fp,OUTPUT_HX);
+ abbrev = ABBREV_HALFMAPPING_MULT_XS;
+ } else {
+ Filestring_set_split_output(fp,OUTPUT_HM);
+ abbrev = ABBREV_HALFMAPPING_MULT;
+ }
} else {
abort();
}
@@ -6964,27 +4361,27 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
} else {
mate = stage3array2[0];
hardclip3_low = hardclip3_high = 0;
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,mate,Shortread_fulllength(queryseq2));
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,mate,Shortread_fulllength(queryseq2),
+ /*first_read_p*/false);
}
if (npaths1 == 0) {
- if (1 || failedinput_root != NULL) {
- /* just printing one end as nomapping */
- /* mate should be non-NULL here */
- SAM_print_nomapping(fp,abbrev,queryseq1,mate,acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/true,npaths1,/*npaths_mate*/npaths2,
- /*mate_chrpos*/chrpos3,
- quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
- }
+ /* just printing one end as nomapping */
+ /* mate should be non-NULL here */
+ SAM_print_nomapping(fp,abbrev,queryseq1,mate,acc1,acc2,chromosome_iit,resulttype,
+ /*first_read_p*/true,npaths1,/*npaths_mate*/npaths2,
+ /*mate_chrpos*/chrpos3,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
} else if (npaths1 == 1) {
/* mate should be NULL here */
stage3 = stage3array1[0];
hardclip5_low = hardclip5_high = 0;
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1));
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1),
+ /*first_read_p*/true);
- SAM_print(fp,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths1,
+ SAM_print(fp,fp_failedinput_1,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths1,
Stage3end_absmq_score(stage3),first_absmq1,/*second_absmq1*/0,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
@@ -6994,23 +4391,22 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
invert_first_p,invert_second_p,merge_samechr_p);
} else if (quiet_if_excessive_p && npaths1 > maxpaths_report) {
- if (1 || failedinput_root != NULL) {
- /* Just printing one end as nomapping */
- /* mate should be NULL here */
- SAM_print_nomapping(fp_xs,abbrev_xs,queryseq1,mate,acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/true,npaths1,/*npaths_mate*/npaths2,
- /*mate_chrpos*/chrpos3,
- quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
- }
+ /* Just printing one end as nomapping */
+ /* mate should be NULL here */
+ SAM_print_nomapping(fp,abbrev,queryseq1,mate,acc1,acc2,chromosome_iit,resulttype,
+ /*first_read_p*/true,npaths1,/*npaths_mate*/npaths2,
+ /*mate_chrpos*/chrpos3,
+ quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
} else {
/* mate should be NULL here */
for (pathnum = 1; pathnum <= npaths1 && pathnum <= maxpaths_report; pathnum++) {
stage3 = stage3array1[pathnum-1];
hardclip5_low = hardclip5_high = 0;
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1));
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1),
+ /*first_read_p*/true);
- SAM_print(fp,abbrev,stage3,mate,acc1,acc2,pathnum,npaths1,
+ SAM_print(fp,fp_failedinput_1,abbrev,stage3,mate,acc1,acc2,pathnum,npaths1,
Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
@@ -7031,27 +4427,27 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
} else {
mate = stage3array1[0];
hardclip5_low = hardclip5_high = 0;
- chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,mate,Shortread_fulllength(queryseq1));
+ chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,mate,Shortread_fulllength(queryseq1),
+ /*first_read_p*/true);
}
if (npaths2 == 0) {
- if (1 || failedinput_root != NULL) {
- /* Just printing one end as nomapping */
- /* mate should be non-NULL here */
- SAM_print_nomapping(fp,abbrev,queryseq2,mate,acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/false,npaths2,/*npaths_mate*/npaths1,
- /*mate_chrpos*/chrpos5,
- quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
- }
+ /* Just printing one end as nomapping */
+ /* mate should be non-NULL here */
+ SAM_print_nomapping(fp,abbrev,queryseq2,mate,acc1,acc2,chromosome_iit,resulttype,
+ /*first_read_p*/false,npaths2,/*npaths_mate*/npaths1,
+ /*mate_chrpos*/chrpos5,
+ quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
} else if (npaths2 == 1) {
/* mate should be NULL here */
stage3 = stage3array2[0];
hardclip3_low = hardclip3_high = 0;
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2));
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2),
+ /*first_read_p*/false);
- SAM_print(fp,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths2,
+ SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths2,
Stage3end_absmq_score(stage3),first_absmq2,/*second_absmq2*/0,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
@@ -7061,23 +4457,22 @@ SAM_print_paired (Result_T result, Resulttype_T resulttype,
invert_second_p,invert_first_p,merge_samechr_p);
} else if (quiet_if_excessive_p && npaths2 > maxpaths_report) {
- if (1 || failedinput_root != NULL) {
- /* Just printing one end as nomapping */
- /* mate should be NULL here */
- SAM_print_nomapping(fp_xs,abbrev_xs,queryseq2,mate,acc1,acc2,chromosome_iit,resulttype,
- /*first_read_p*/false,npaths2,/*npaths_mate*/npaths1,
- /*mate_chrpos*/chrpos5,
- quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
- }
+ /* Just printing one end as nomapping */
+ /* mate should be NULL here */
+ SAM_print_nomapping(fp,abbrev,queryseq2,mate,acc1,acc2,chromosome_iit,resulttype,
+ /*first_read_p*/false,npaths2,/*npaths_mate*/npaths1,
+ /*mate_chrpos*/chrpos5,
+ quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
} else {
/* mate should be NULL here */
for (pathnum = 1; pathnum <= npaths2 && pathnum <= maxpaths_report; pathnum++) {
stage3 = stage3array2[pathnum-1];
hardclip3_low = hardclip3_high = 0;
- chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2));
+ chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2),
+ /*first_read_p*/false);
- SAM_print(fp,abbrev,stage3,mate,acc1,acc2,pathnum,npaths2,
+ SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,pathnum,npaths2,
Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
Stage3end_mapq_score(stage3),chromosome_iit,
/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
diff --git a/src/samprint.h b/src/samprint.h
index 9d1dc8b..b237dd4 100644
--- a/src/samprint.h
+++ b/src/samprint.h
@@ -1,52 +1,36 @@
-/* $Id: samprint.h 160877 2015-03-13 00:31:23Z twu $ */
+/* $Id: samprint.h 166641 2015-05-29 21:13:04Z twu $ */
#ifndef SAMPRINT_INCLUDED
#define SAMPRINT_INCLUDED
#include <stdio.h>
-#include "stage3hr.h"
#include "iit-read-univ.h"
#include "iit-read.h"
-#include "shortread.h"
-#include "resulthr.h"
#include "genomicpos.h"
#include "types.h"
#include "substring.h"
#include "bool.h"
#include "intlist.h"
+#include "filestring.h"
-extern void
-SAM_setup (bool quiet_if_excessive_p_in, int maxpaths_report_in,
- char *failedinput_root_in, bool fastq_format_p_in, bool hide_soft_clips_p_in,
- bool sam_multiple_primaries_p_in,
- bool force_xs_direction_p_in, bool md_lowercase_variant_p_in, IIT_T snps_iit_in);
-
-extern void
-SAM_file_setup_single (FILE *fp_failedinput_in, FILE *fp_nomapping_in,
- FILE *fp_unpaired_uniq_in, FILE *fp_unpaired_circular_in, FILE *fp_unpaired_transloc_in,
- FILE *fp_unpaired_mult_in, FILE *fp_unpaired_mult_xs_1_in);
+#ifdef GSNAP
+#include "shortread.h"
+#include "stage3hr.h"
+#include "resulthr.h"
+#include "genome.h"
+#endif
-extern void
-SAM_file_setup_paired (FILE *failedinput_1_in, FILE *failedinput_2_in, FILE *fp_nomapping_in,
- FILE *fp_halfmapping_uniq_in, FILE *fp_halfmapping_circular_in, FILE *fp_halfmapping_transloc_in,
- FILE *fp_halfmapping_mult_in, FILE *fp_halfmapping_mult_xs_1_in, FILE *fp_halfmapping_mult_xs_2_in,
- FILE *fp_paired_uniq_circular_in, FILE *fp_paired_uniq_inv_in, FILE *fp_paired_uniq_scr_in,
- FILE *fp_paired_uniq_long_in, FILE *fp_paired_mult_in, FILE *fp_paired_mult_xs_1_in, FILE *fp_paired_mult_xs_2_in,
- FILE *fp_concordant_uniq_in, FILE *fp_concordant_circular_in, FILE *fp_concordant_transloc_in,
- FILE *fp_concordant_mult_in, FILE *fp_concordant_mult_xs_1_in, FILE *fp_concordant_mult_xs_2_in);
+#ifdef GSNAP
extern void
-SAM_file_setup_all (FILE *failedinput_1_in, FILE *failedinput_2_in, FILE *fp_nomapping_in,
- FILE *fp_unpaired_uniq_in, FILE *fp_unpaired_circular_in, FILE *fp_unpaired_transloc_in,
- FILE *fp_unpaired_mult_in, FILE *fp_unpaired_mult_xs_1_in, FILE *fp_unpaired_mult_xs_2_in,
- FILE *fp_halfmapping_uniq_in, FILE *fp_halfmapping_circular_in, FILE *fp_halfmapping_transloc_in,
- FILE *fp_halfmapping_mult_in, FILE *fp_halfmapping_mult_xs_1_in, FILE *fp_halfmapping_mult_xs_2_in,
- FILE *fp_paired_uniq_circular_in, FILE *fp_paired_uniq_inv_in, FILE *fp_paired_uniq_scr_in,
- FILE *fp_paired_uniq_long_in, FILE *fp_paired_mult_in, FILE *fp_paired_mult_xs_1_in, FILE *fp_paired_mult_xs_2_in,
- FILE *fp_concordant_uniq_in, FILE *fp_concordant_circular_in, FILE *fp_concordant_transloc_in,
- FILE *fp_concordant_mult_in, FILE *fp_concordant_mult_xs_1_in, FILE *fp_concordant_mult_xs_2_in);
+SAM_setup (bool quiet_if_excessive_p_in, int maxpaths_report_in,
+ char *failedinput_root_in, bool fastq_format_p_in, bool hide_soft_clips_p_in,
+ bool clip_overlap_p_in, bool merge_overlap_p_in, bool sam_multiple_primaries_p_in,
+ bool force_xs_direction_p_in, bool md_lowercase_variant_p_in, IIT_T snps_iit_in,
+ Univ_IIT_T chromosome_iit_in, Genome_T genome_in);
extern Chrpos_T
-SAM_compute_chrpos (int hardclip_low, int hardclip_high, Stage3end_T this, int querylength);
+SAM_compute_chrpos (int hardclip_low, int hardclip_high, Stage3end_T this, int querylength,
+ bool first_read_p);
extern unsigned int
SAM_compute_flag (bool plusp, Stage3end_T mate, Resulttype_T resulttype,
@@ -54,14 +38,14 @@ SAM_compute_flag (bool plusp, Stage3end_T mate, Resulttype_T resulttype,
int absmq_score, int first_absmq, bool invertp, bool invert_mate_p);
extern void
-SAM_print_nomapping (FILE *fp, char *abbrev, Shortread_T queryseq, Stage3end_T mate, char *acc1, char *acc2,
+SAM_print_nomapping (Filestring_T fp, char *abbrev, Shortread_T queryseq, Stage3end_T mate, char *acc1, char *acc2,
Univ_IIT_T chromosome_iit, Resulttype_T resulttype, bool first_read_p,
int npaths, int npaths_mate, Chrpos_T mate_chrpos,
int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p);
extern void
-SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
- char *acc1, char *acc2, int pathnum, int npaths,
+SAM_print (Filestring_T fp, Filestring_T fp_failedinput, char *abbrev,
+ Stage3end_T this, Stage3end_T mate, char *acc1, char *acc2, int pathnum, int npaths,
int absmq_score, int first_absmq, int second_absmq, int mapq_score, Univ_IIT_T chromosome_iit, Shortread_T queryseq,
Shortread_T queryseq2, int pairedlength, Chrpos_T chrpos, Chrpos_T mate_chrpos,
int clipdir, int hardclip5_low, int hardclip5_high, int hardclip3_low, int hardclip3_high,
@@ -69,11 +53,12 @@ SAM_print (FILE *fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
char *sam_read_group_id, bool invertp, bool invert_mate_p, bool merge_samechr_p);
extern void
-SAM_print_paired (Result_T result, Resulttype_T resulttype,
- Univ_IIT_T chromosome_iit, Shortread_T queryseq1, Shortread_T queryseq2,
- bool invert_first_p, bool invert_second_p,
- bool nofailsp, bool failsonlyp, bool clip_overlap_p, bool merge_overlap_p,
- bool merge_samechr_p, int quality_shift, char *sam_read_group_id);
+SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2,
+ Result_T result, Resulttype_T resulttype, Univ_IIT_T chromosome_iit,
+ Shortread_T queryseq1, Shortread_T queryseq2, bool invert_first_p, bool invert_second_p,
+ bool nofailsp, bool failsonlyp, bool merge_samechr_p,
+ int quality_shift, char *sam_read_group_id);
+#endif
#endif
diff --git a/src/sarray-read.c b/src/sarray-read.c
index c8d3e60..5582b1a 100644
--- a/src/sarray-read.c
+++ b/src/sarray-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-read.c 155505 2014-12-16 22:23:18Z twu $";
+static char rcsid[] = "$Id: sarray-read.c 166828 2015-06-03 06:56:12Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -12,6 +12,7 @@ static char rcsid[] = "$Id: sarray-read.c 155505 2014-12-16 22:23:18Z twu $";
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h> /* For munmap */
+
#include "mem.h"
#include "bool.h"
#include "assert.h"
@@ -28,6 +29,24 @@ static char rcsid[] = "$Id: sarray-read.c 155505 2014-12-16 22:23:18Z twu $";
#include "bitpack64-readtwo.h"
#include "bitpack64-access.h"
+#include "comp.h"
+#include "diagdef.h"
+#include "diag.h"
+#include "univdiagdef.h"
+#include "univdiag.h"
+#include "substring.h"
+#include "junction.h"
+#include "stage3hr.h"
+
+#ifdef USE_CSA
+/* For FREAD_UINT */
+#ifdef WORDS_BIGENDIAN
+#include "bigendian.h"
+#else
+#include "littleendian.h"
+#endif
+#endif
+
#ifdef HAVE_SSE2
#include <emmintrin.h>
@@ -42,6 +61,13 @@ static char rcsid[] = "$Id: sarray-read.c 155505 2014-12-16 22:23:18Z twu $";
#endif
+#define MIN_ENDLENGTH 12
+#define MIN_INTRONLEN 9
+
+#define MAX_HITS_FOR_BEST_ELT 1000
+
+/* #define USE_CSA 1 */
+
/* A value of 10000 misses various splices, although they are caught by GSNAP algorithm */
#define EXCESS_SARRAY_HITS 100000
#define LOCALSPLICING_NMATCHES_SLOP 1
@@ -60,6 +86,8 @@ static char rcsid[] = "$Id: sarray-read.c 155505 2014-12-16 22:23:18Z twu $";
#define debug(x)
#endif
+#define MAX_DEBUG1_HITS 100
+
/* Details of suffix array search */
#ifdef DEBUG1
#define debug1(x) x
@@ -81,6 +109,26 @@ static char rcsid[] = "$Id: sarray-read.c 155505 2014-12-16 22:23:18Z twu $";
#define debug2(x)
#endif
+/* Compressed suffix array */
+#ifdef DEBUG3
+#define debug3(x) x
+#else
+#define debug3(x)
+#endif
+
+/* Compressed suffix array: comparison with sarray */
+#ifdef DEBUG3A
+#define debug3a(x) x
+#else
+#define debug3a(x)
+#endif
+
+/* Compressed suffix array: comparison with csa phi */
+#ifdef DEBUG3B
+#define debug3b(x) x
+#else
+#define debug3b(x)
+#endif
/* known splicing */
#ifdef DEBUG4S
@@ -125,7 +173,21 @@ static char rcsid[] = "$Id: sarray-read.c 155505 2014-12-16 22:23:18Z twu $";
#define debug10(x)
#endif
-/* Compare sarray_search with sarray_search_simple */
+/* Sorting of diagonals */
+#ifdef DEBUG12
+#define debug12(x) x
+#else
+#define debug12(x)
+#endif
+
+/* GMAP */
+#ifdef DEBUG13
+#define debug13(x) x
+#else
+#define debug13(x)
+#endif
+
+/* Oligoindex fillin */
#ifdef DEBUG14
#define debug14(x) x
#else
@@ -167,15 +229,36 @@ struct T {
Univcoord_T n;
Univcoord_T n_plus_one;
+ /* Old format */
+ int array_shmid;
Univcoord_T *array;
+#ifdef USE_CSA
+#ifdef DEBUG3B
+ Univcoord_T *csa;
+#endif
+
+ /* New format */
+ int sa_sampling;
+ Univcoord_T *array_samples;
+ int csaAptrs_shmid, csaAcomp_shmid, csaCptrs_shmid, csaCcomp_shmid,
+ csaGptrs_shmid, csaGcomp_shmid, csaTptrs_shmid, csaTcomp_shmid, csaXptrs_shmid, csaXcomp_shmid;
+ UINT4 *csaAptrs, *csaAcomp, *csaCptrs, *csaCcomp, *csaGptrs, *csaGcomp, *csaTptrs, *csaTcomp, *csaXptrs, *csaXcomp;
+ UINT4 *csa0ptrs[16], *csa0comp[16];
+#endif
+
+ int lcpchilddc_shmid;
unsigned char *lcpchilddc;
+ int lcp_guide_shmid;
+ int lcp_exceptions_shmid;
UINT4 *lcp_guide;
UINT4 *lcp_exceptions;
int n_lcp_exceptions; /* Won't be necessary if we change lcpchilddc to use guide array */
/* int lcp_guide_interval; -- Always use 1024 */
+ int child_guide_shmid;
+ int child_exceptions_shmid;
UINT4 *child_guide;
UINT4 *child_exceptions;
/* int n_child_exceptions; */
@@ -185,6 +268,17 @@ struct T {
Sarrayptr_T initindexi[4]; /* For A, C, G, T */
Sarrayptr_T initindexj[4]; /* For A, C, G, T */
#endif
+#ifdef USE_CSA
+ Sarrayptr_T indexA;
+ Sarrayptr_T indexC;
+ Sarrayptr_T indexG;
+ Sarrayptr_T indexT;
+ Sarrayptr_T indexX;
+#ifdef HAVE_SSE2
+ __m128i indices0;
+ UINT4 index0[16];
+#endif
+#endif
int indexsize;
UINT4 indexspace; /* 4^indexsize. Used by sarray_search to detect when we have a poly-T oligo shorter than indexsize */
@@ -194,13 +288,25 @@ struct T {
#elif defined(USE_SEPARATE_BUCKETS)
UINT4 *indexi_ptrs, *indexi_comp, *indexj_ptrs, *indexj_comp; /* bucket array: oligomer lookup into suffix array */
#else
+ int indexij_ptrs_shmid;
+ int indexij_comp_shmid;
UINT4 *indexij_ptrs, *indexij_comp;
#endif
Access_T sarray_access;
- Access_T aux_access;
+ Access_T lcp_access;
+ Access_T guideexc_access;
+ Access_T indexij_access;
int array_fd; size_t array_len;
+#ifdef USE_CSA
+ int csaAptrs_fd; size_t csaAptrs_len; int csaAcomp_fd; size_t csaAcomp_len;
+ int csaCptrs_fd; size_t csaCptrs_len; int csaCcomp_fd; size_t csaCcomp_len;
+ int csaGptrs_fd; size_t csaGptrs_len; int csaGcomp_fd; size_t csaGcomp_len;
+ int csaTptrs_fd; size_t csaTptrs_len; int csaTcomp_fd; size_t csaTcomp_len;
+ int csaXptrs_fd; size_t csaXptrs_len; int csaXcomp_fd; size_t csaXcomp_len;
+#endif
+
#ifdef DEBUG15
int indexi_ptrs_fd; size_t indexi_ptrs_len; int indexi_comp_fd; size_t indexi_comp_len;
int indexj_ptrs_fd; size_t indexj_ptrs_len; int indexj_comp_fd; size_t indexj_comp_len;
@@ -254,6 +360,10 @@ static Chrpos_T *splicedists;
static int nsplicesites;
+#ifdef HAVE_SSE2
+static __m128i epi32_convert; /* For converting unsigned ints to signed ints */
+#endif
+
#if defined(HAVE_SSE2) && defined(USE_SHUFFLE_MASK)
static __m128i shuffle_mask16[16];
#endif
@@ -386,8 +496,12 @@ Sarray_setup (T sarray_fwd_in, T sarray_rev_in, Genome_T genome_in, Mode_T mode,
printf("T => %u %u\n",sarray->initindexi[3],sarray->initindexj[3]);
#endif
+#ifdef HAVE_SSE2
+ epi32_convert = _mm_set1_epi32(2147483648); /* 2^31 */
+#endif
+
#if defined(HAVE_SSE2) && defined(USE_SHUFFLE_MASK)
- /* Used by Elt_fill_positions_filtered */
+ /* Used by fill_positions_filtered_first */
shuffle_mask16[0] = _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1);
shuffle_mask16[1] = _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 3, 2, 1, 0);
shuffle_mask16[2] = _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 7, 6, 5, 4);
@@ -435,16 +549,184 @@ power (int base, int exponent) {
}
+void
+Sarray_shmem_remove (char *dir, char *fileroot, char *snps_root, Mode_T mode, bool fwdp) {
+ char *mode_prefix;
+ char *sarrayfile;
+ char *lcpchilddcfile;
+ char *lcp_guidefile, *lcp_exceptionsfile;
+ char *child_guidefile, *child_exceptionsfile;
+ char *indexij_ptrsfile, *indexij_compfile;
+
+ if (mode == STANDARD) {
+ mode_prefix = ".";
+ } else if (mode == CMET_STRANDED || mode == CMET_NONSTRANDED) {
+ if (fwdp == true) {
+ mode_prefix = ".metct.";
+ } else {
+ mode_prefix = ".metga.";
+ }
+ } else if (mode == ATOI_STRANDED || mode == ATOI_NONSTRANDED) {
+ if (fwdp == true) {
+ mode_prefix = ".a2iag.";
+ } else {
+ mode_prefix = ".a2itc.";
+ }
+ }
+
+ sarrayfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("sarray")+1,sizeof(char));
+ sprintf(sarrayfile,"%s/%s%ssarray",dir,fileroot,mode_prefix);
+
+ lcpchilddcfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("salcpchilddc")+1,sizeof(char));
+ sprintf(lcpchilddcfile,"%s/%s%ssalcpchilddc",dir,fileroot,mode_prefix);
+
+ lcp_guidefile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("salcpguide1024")+1,sizeof(char));
+ sprintf(lcp_guidefile,"%s/%s%ssalcpguide1024",dir,fileroot,mode_prefix);
+ lcp_exceptionsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("salcpexc")+1,sizeof(char));
+ sprintf(lcp_exceptionsfile,"%s/%s%ssalcpexc",dir,fileroot,mode_prefix);
+
+ child_guidefile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("sachildguide1024")+1,sizeof(char));
+ sprintf(child_guidefile,"%s/%s%ssachildguide1024",dir,fileroot,mode_prefix);
+ child_exceptionsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("sachildexc")+1,sizeof(char));
+ sprintf(child_exceptionsfile,"%s/%s%ssachildexc",dir,fileroot,mode_prefix);
+
+ indexij_ptrsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("saindex64meta")+1,sizeof(char));
+ sprintf(indexij_ptrsfile,"%s/%s%ssaindex64meta",dir,fileroot,mode_prefix);
+ indexij_compfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("saindex64strm")+1,sizeof(char));
+ sprintf(indexij_compfile,"%s/%s%ssaindex64strm",dir,fileroot,mode_prefix);
+
+ Access_shmem_remove(indexij_ptrsfile);
+ Access_shmem_remove(indexij_compfile);
+
+ Access_shmem_remove(sarrayfile);
+ Access_shmem_remove(lcpchilddcfile);
+ Access_shmem_remove(lcp_guidefile);
+ Access_shmem_remove(lcp_exceptionsfile);
+
+ Access_shmem_remove(child_guidefile);
+ Access_shmem_remove(child_exceptionsfile);
+
+ FREE(child_exceptionsfile);
+ FREE(child_guidefile);
+
+ FREE(lcp_exceptionsfile);
+ FREE(lcp_guidefile);
+
+ FREE(lcpchilddcfile);
+
+ FREE(sarrayfile);
+
+ return;
+}
+
+
+#ifdef USE_CSA
+
+static Univcoord_T
+csa_lookup (T sarray, Sarrayptr_T i) {
+ Univcoord_T nhops = 0, expected_sa_i;
+ Sarrayptr_T expected_i;
+ __m128i converted, cmp;
+ int matchbits;
+
+ debug3(printf("Entered csa_lookup for %u:",i));
+#ifdef DEBUG3A
+ expected_sa_i = sarray->array[i];
+#endif
+
+ if (
+#ifdef DEBUG3A
+ 0 &&
+#endif
+ sarray->array != NULL) {
+ debug3(printf("Returning %u\n",sarray->array[i]));
+ return sarray->array[i];
+ } else {
+ while ((i % sarray->sa_sampling) != 0) {
+ debug3(printf(",%u",i));
+#ifdef DEBUG3B
+ expected_i = sarray->csa[i];
+#endif
+
+#ifdef HAVE_SSE2
+ converted = _mm_sub_epi32(_mm_set1_epi32(i),epi32_convert);
+ cmp = _mm_cmpgt_epi32(converted,sarray->indices0); /* To use cmpgt, sarray->indices0 is shifted down by 1 */
+ matchbits = _mm_movemask_ps(_mm_castsi128_ps(cmp));
+ /* assert(matchbits == 0 || matchbits == 1 || matchbits == 3 || matchbits == 7 || matchbits == 15); */
+ debug3(printf("(%d)",matchbits));
+ i = Bitpack64_read_one(i - sarray->index0[matchbits],sarray->csa0ptrs[matchbits],sarray->csa0comp[matchbits]);
+#else
+ if (i >= sarray->indexX) {
+ assert(matchbits == 15);
+ printf("X");
+ i = Bitpack64_read_one(i-sarray->indexX,sarray->csaXptrs,sarray->csaXcomp);
+ } else if (i >= sarray->indexT) {
+ assert(matchbits == 7);
+ printf("T");
+ i = Bitpack64_read_one(i-sarray->indexT,sarray->csaTptrs,sarray->csaTcomp);
+ } else if (i >= sarray->indexG) {
+ assert(matchbits == 3);
+ printf("G");
+ i = Bitpack64_read_one(i-sarray->indexG,sarray->csaGptrs,sarray->csaGcomp);
+ } else if (i >= sarray->indexC) {
+ assert(matchbits == 1);
+ printf("C");
+ i = Bitpack64_read_one(i-sarray->indexC,sarray->csaCptrs,sarray->csaCcomp);
+ } else {
+ assert(matchbits == 0);
+ printf("A");
+ i = Bitpack64_read_one(i-sarray->indexA,sarray->csaAptrs,sarray->csaAcomp);
+ }
+#endif
+
+ debug3b(assert(i == expected_i));
+ nhops += 1;
+ }
+
+ debug3(printf("\n"));
+ debug3(printf("Returning %u = %u - nhops %u\n",
+ sarray->array_samples[i/sarray->sa_sampling] - nhops,
+ sarray->array_samples[i/sarray->sa_sampling],nhops));
+
+ debug3a(assert(sarray->array_samples[i/sarray->sa_sampling] - nhops == expected_sa_i));
+
+ return sarray->array_samples[i/sarray->sa_sampling] - nhops;
+ }
+}
+
+#else
+
+#define csa_lookup(sarray,i) sarray->array[i]
+
+#endif
+
+
/* Ignores snps_root */
T
-Sarray_new (char *dir, char *fileroot, char *snps_root, Access_mode_T sarray_access, Access_mode_T aux_access,
- Mode_T mode, bool fwdp) {
+Sarray_new (char *dir, char *fileroot, char *snps_root, Access_mode_T sarray_access, Access_mode_T lcp_access,
+ Access_mode_T guideexc_access, Access_mode_T indexij_access, bool sharedp, Mode_T mode, bool fwdp) {
T new;
char *comma1;
double seconds;
int npages;
- char *sarrayfile;
+ bool old_format_p;
+ char *sarrayfile; /* Old format */
+
+#ifdef USE_CSA
+ char *csafile;
+ int shmid;
+ int fd, fd0;
+ size_t len, len0;
+
+ /* New format */
+ char *sasamplesfile;
+ char *csaA_ptrsfile, *csaA_compfile, *csaC_ptrsfile, *csaC_compfile,
+ *csaG_ptrsfile, *csaG_compfile, *csaT_ptrsfile, *csaT_compfile, *csaX_ptrsfile, *csaX_compfile;
+ char *filename;
+ FILE *fp;
+#endif
+
char *lcpchilddcfile;
char *lcp_guidefile, *lcp_exceptionsfile;
char *child_guidefile, *child_exceptionsfile;
@@ -477,9 +759,43 @@ Sarray_new (char *dir, char *fileroot, char *snps_root, Access_mode_T sarray_acc
}
}
+ /* Old format */
sarrayfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("sarray")+1,sizeof(char));
sprintf(sarrayfile,"%s/%s%ssarray",dir,fileroot,mode_prefix);
+#ifdef USE_CSA
+#ifdef DEBUG3A
+ csafile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("csa")+1,sizeof(char));
+ sprintf(csafile,"%s/%s%scsa",dir,fileroot,mode_prefix);
+#endif
+
+ /* New format */
+ sasamplesfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("sasamples")+1,sizeof(char));
+ sprintf(sasamplesfile,"%s/%s%ssasamples",dir,fileroot,mode_prefix);
+
+ csaA_ptrsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("csaAmeta")+1,sizeof(char));
+ sprintf(csaA_ptrsfile,"%s/%s%scsaAmeta",dir,fileroot,mode_prefix);
+ csaA_compfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("csaAstrm")+1,sizeof(char));
+ sprintf(csaA_compfile,"%s/%s%scsaAstrm",dir,fileroot,mode_prefix);
+ csaC_ptrsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("csaCmeta")+1,sizeof(char));
+ sprintf(csaC_ptrsfile,"%s/%s%scsaCmeta",dir,fileroot,mode_prefix);
+ csaC_compfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("csaCstrm")+1,sizeof(char));
+ sprintf(csaC_compfile,"%s/%s%scsaCstrm",dir,fileroot,mode_prefix);
+ csaG_ptrsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("csaGmeta")+1,sizeof(char));
+ sprintf(csaG_ptrsfile,"%s/%s%scsaGmeta",dir,fileroot,mode_prefix);
+ csaG_compfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("csaGstrm")+1,sizeof(char));
+ sprintf(csaG_compfile,"%s/%s%scsaGstrm",dir,fileroot,mode_prefix);
+ csaT_ptrsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("csaTmeta")+1,sizeof(char));
+ sprintf(csaT_ptrsfile,"%s/%s%scsaTmeta",dir,fileroot,mode_prefix);
+ csaT_compfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("csaTstrm")+1,sizeof(char));
+ sprintf(csaT_compfile,"%s/%s%scsaTstrm",dir,fileroot,mode_prefix);
+ csaX_ptrsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("csaXmeta")+1,sizeof(char));
+ sprintf(csaX_ptrsfile,"%s/%s%scsaXmeta",dir,fileroot,mode_prefix);
+ csaX_compfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("csaXstrm")+1,sizeof(char));
+ sprintf(csaX_compfile,"%s/%s%scsaXstrm",dir,fileroot,mode_prefix);
+#endif
+
+
lcpchilddcfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("salcpchilddc")+1,sizeof(char));
sprintf(lcpchilddcfile,"%s/%s%ssalcpchilddc",dir,fileroot,mode_prefix);
@@ -522,8 +838,12 @@ Sarray_new (char *dir, char *fileroot, char *snps_root, Access_mode_T sarray_acc
sprintf(indexij_compfile,"%s/%s%ssaindex64strm",dir,fileroot,mode_prefix);
#endif
- if (Access_file_exists_p(sarrayfile) == false) {
- fprintf(stderr,"Suffix array index file %s does not exist\n",sarrayfile);
+ if (Access_file_exists_p(sarrayfile) == false
+#ifdef USE_CSA
+ && Access_file_exists_p(csaA_ptrsfile) == false
+#endif
+ ) {
+ fprintf(stderr,"No suffix array for genome\n");
new = (T) NULL;
} else if (Access_file_exists_p(lcpchilddcfile) == false) {
@@ -533,74 +853,338 @@ Sarray_new (char *dir, char *fileroot, char *snps_root, Access_mode_T sarray_acc
exit(9);
} else {
- new = (T) MALLOC(sizeof(*new));
+ new = (T) MALLOC_KEEP(sizeof(*new));
+
+#ifdef USE_CSA
+ if (
+#ifdef DEBUG3A
+ 0 &&
+#endif
+ Access_file_exists_p(sarrayfile) == true) {
+ fprintf(stderr,"The genome was built using a non-compressed suffix array: %s\n",sarrayfile);
+ old_format_p = true;
+
+ } else {
+ old_format_p = false;
+
+#ifdef DEBUG3B
+ new->csa = (UINT4 *) Access_mmap_and_preload(&fd0,&len0,&npages,&seconds,csafile,sizeof(UINT4));
+#endif
+
+ filename = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("saindex0")+1,sizeof(char));
+ sprintf(filename,"%s/%s%ssaindex0",dir,fileroot,mode_prefix);
+ fp = fopen(filename,"rb");
+ FREAD_UINT(&new->indexA,fp);
+ FREAD_UINT(&new->indexC,fp);
+ FREAD_UINT(&new->indexG,fp);
+ FREAD_UINT(&new->indexT,fp);
+ FREAD_UINT(&new->indexX,fp);
+
+ /* For compressed suffix arrays, cannot rely upon array_len */
+ FREAD_UINT(&new->n_plus_one,fp); /* Should be genomiclength + 1 */
+ new->n = new->n_plus_one - 1;
+
+ /* Needed for SSE2 version of csa_lookup */
+ new->index0[0] = new->indexA;
+ new->index0[1] = new->indexC;
+ new->index0[3] = new->indexG;
+ new->index0[7] = new->indexT;
+ new->index0[15] = new->indexX;
+
+ fclose(fp);
+ FREE(filename);
+
+#ifdef HAVE_SSE2
+ new->indices0 = _mm_sub_epi32(_mm_set_epi32(new->indexX,new->indexT,new->indexG,new->indexC),
+ _mm_set1_epi32(2147483648) /* 2^31, same as epi_convert */);
+ /* because (a >= indices) is equivalent to (a > indices - 1) */
+ new->indices0 = _mm_sub_epi32(new->indices0,_mm_set1_epi32(1));
+#endif
+
+
+ filename = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("sasampleq")+1,sizeof(char));
+ sprintf(filename,"%s/%s%ssasampleq",dir,fileroot,mode_prefix);
+ fp = fopen(filename,"rb");
+ FREAD_UINT(&new->sa_sampling,fp);
+ fclose(fp);
+ FREE(filename);
+ }
+
+ new->array_samples = (UINT4 *) NULL;
+#else
+ old_format_p = true;
+#endif
if (sarray_access == USE_MMAP_PRELOAD) {
- fprintf(stderr,"Pre-loading suffix array...");
- new->array = (UINT4 *) Access_mmap_and_preload(&new->array_fd,&new->array_len,&npages,&seconds,sarrayfile,
- sizeof(UINT4));
+ if (old_format_p == true) {
+ fprintf(stderr,"Pre-loading suffix array...");
+ new->array = (UINT4 *) Access_mmap_and_preload(&new->array_fd,&new->array_len,&npages,&seconds,sarrayfile,
+ sizeof(UINT4));
+ new->n_plus_one = new->array_len/sizeof(UINT4); /* Should be genomiclength + 1*/
+ new->n = new->n_plus_one - 1;
+
+ comma1 = Genomicpos_commafmt(new->array_len);
+ fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
+ FREE(comma1);
+
+#ifdef USE_CSA
+ } else {
+#ifdef DEBUG3A
+ new->array = (UINT4 *) Access_mmap_and_preload(&new->array_fd,&new->array_len,&npages,&seconds,sarrayfile,
+ sizeof(UINT4));
+ new->array_samples = (UINT4 *) Access_mmap_and_preload(&fd,&len,&npages,&seconds,sasamplesfile,
+ sizeof(UINT4));
+#else
+ new->array = (UINT4 *) NULL;
+ new->array_samples = (UINT4 *) Access_mmap_and_preload(&new->array_fd,&new->array_len,&npages,&seconds,sasamplesfile,
+ sizeof(UINT4));
+#endif
+ new->csaAptrs = (UINT4 *) Access_mmap_and_preload(&new->csaAptrs_fd,&new->csaAptrs_len,&npages,&seconds,csaA_ptrsfile,
+ sizeof(UINT4));
+ new->csaAcomp = (UINT4 *) Access_mmap_and_preload(&new->csaAcomp_fd,&new->csaAcomp_len,&npages,&seconds,csaA_compfile,
+ sizeof(UINT4));
+ new->csaCptrs = (UINT4 *) Access_mmap_and_preload(&new->csaCptrs_fd,&new->csaCptrs_len,&npages,&seconds,csaC_ptrsfile,
+ sizeof(UINT4));
+ new->csaCcomp = (UINT4 *) Access_mmap_and_preload(&new->csaCcomp_fd,&new->csaCcomp_len,&npages,&seconds,csaC_compfile,
+ sizeof(UINT4));
+ new->csaGptrs = (UINT4 *) Access_mmap_and_preload(&new->csaGptrs_fd,&new->csaGptrs_len,&npages,&seconds,csaG_ptrsfile,
+ sizeof(UINT4));
+ new->csaGcomp = (UINT4 *) Access_mmap_and_preload(&new->csaGcomp_fd,&new->csaGcomp_len,&npages,&seconds,csaG_compfile,
+ sizeof(UINT4));
+ new->csaTptrs = (UINT4 *) Access_mmap_and_preload(&new->csaTptrs_fd,&new->csaTptrs_len,&npages,&seconds,csaT_ptrsfile,
+ sizeof(UINT4));
+ new->csaTcomp = (UINT4 *) Access_mmap_and_preload(&new->csaTcomp_fd,&new->csaTcomp_len,&npages,&seconds,csaT_compfile,
+ sizeof(UINT4));
+ new->csaXptrs = (UINT4 *) Access_mmap_and_preload(&new->csaXptrs_fd,&new->csaXptrs_len,&npages,&seconds,csaX_ptrsfile,
+ sizeof(UINT4));
+ new->csaXcomp = (UINT4 *) Access_mmap_and_preload(&new->csaXcomp_fd,&new->csaXcomp_len,&npages,&seconds,csaX_compfile,
+ sizeof(UINT4));
+#endif
+ }
new->sarray_access = MMAPPED;
- comma1 = Genomicpos_commafmt(new->array_len);
- fprintf(stderr,"done (%s bytes)\n",comma1);
- FREE(comma1);
+
} else if (sarray_access == USE_MMAP_ONLY) {
- new->array = (UINT4 *) Access_mmap(&new->array_fd,&new->array_len,sarrayfile,sizeof(UINT4),/*randomp*/true);
+ if (old_format_p == true) {
+ new->array = (UINT4 *) Access_mmap(&new->array_fd,&new->array_len,sarrayfile,sizeof(UINT4),/*randomp*/true);
+ new->n_plus_one = new->array_len/sizeof(UINT4); /* Should be genomiclength + 1*/
+ new->n = new->n_plus_one - 1;
+#ifdef USE_CSA
+ } else {
+#ifdef DEBUG3A
+ new->array = (UINT4 *) Access_mmap(&new->array_fd,&new->array_len,sarrayfile,sizeof(UINT4),/*randomp*/true);
+ new->array_samples = (UINT4 *) Access_mmap(&fd,&len,sasamplesfile,sizeof(UINT4),/*randomp*/true);
+#else
+ new->array = (UINT4 *) NULL;
+ new->array_samples = (UINT4 *) Access_mmap(&new->array_fd,&new->array_len,sasamplesfile,sizeof(UINT4),/*randomp*/true);
+#endif
+ new->csaAptrs = (UINT4 *) Access_mmap(&new->csaAptrs_fd,&new->csaAptrs_len,csaA_ptrsfile,sizeof(UINT4),/*randomp*/true);
+ new->csaAcomp = (UINT4 *) Access_mmap(&new->csaAcomp_fd,&new->csaAcomp_len,csaA_compfile,sizeof(UINT4),/*randomp*/true);
+ new->csaCptrs = (UINT4 *) Access_mmap(&new->csaCptrs_fd,&new->csaCptrs_len,csaC_ptrsfile,sizeof(UINT4),/*randomp*/true);
+ new->csaCcomp = (UINT4 *) Access_mmap(&new->csaCcomp_fd,&new->csaCcomp_len,csaC_compfile,sizeof(UINT4),/*randomp*/true);
+ new->csaGptrs = (UINT4 *) Access_mmap(&new->csaGptrs_fd,&new->csaGptrs_len,csaG_ptrsfile,sizeof(UINT4),/*randomp*/true);
+ new->csaGcomp = (UINT4 *) Access_mmap(&new->csaGcomp_fd,&new->csaGcomp_len,csaG_compfile,sizeof(UINT4),/*randomp*/true);
+ new->csaTptrs = (UINT4 *) Access_mmap(&new->csaTptrs_fd,&new->csaTptrs_len,csaT_ptrsfile,sizeof(UINT4),/*randomp*/true);
+ new->csaTcomp = (UINT4 *) Access_mmap(&new->csaTcomp_fd,&new->csaTcomp_len,csaT_compfile,sizeof(UINT4),/*randomp*/true);
+ new->csaXptrs = (UINT4 *) Access_mmap(&new->csaXptrs_fd,&new->csaXptrs_len,csaX_ptrsfile,sizeof(UINT4),/*randomp*/true);
+ new->csaXcomp = (UINT4 *) Access_mmap(&new->csaXcomp_fd,&new->csaXcomp_len,csaX_compfile,sizeof(UINT4),/*randomp*/true);
+#endif
+ }
new->sarray_access = MMAPPED;
+
} else if (sarray_access == USE_ALLOCATE) {
- new->array = (UINT4 *) Access_allocated(&new->array_len,&seconds,sarrayfile,sizeof(UINT4));
- new->sarray_access = ALLOCATED;
+ if (old_format_p == true) {
+ fprintf(stderr,"Allocating memory for suffix array...");
+ new->array = (UINT4 *) Access_allocate(&new->array_shmid,&new->array_len,&seconds,sarrayfile,sizeof(UINT4),sharedp);
+ new->n_plus_one = new->array_len/sizeof(UINT4); /* Should be genomiclength + 1*/
+ new->n = new->n_plus_one - 1;
+ comma1 = Genomicpos_commafmt(new->array_len);
+ fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
+ FREE(comma1);
+#ifdef USE_CSA
+ } else {
+#ifdef DEBUG3A
+ new->array = (UINT4 *) Access_allocate(&new->array_shmid,&new->array_len,&seconds,sarrayfile,sizeof(UINT4),sharedp);
+#else
+ new->array = (UINT4 *) NULL;
+#endif
+ new->array_samples = (UINT4 *) Access_allocate(&shmid,&len,&seconds,sasamplesfile,sizeof(UINT4),sharedp);
+ new->csaAptrs = (UINT4 *) Access_allocate(&new->csaAptrs_shmid,&new->csaAptrs_len,&seconds,csaA_ptrsfile,sizeof(UINT4),sharedp);
+ new->csaAcomp = (UINT4 *) Access_allocate(&new->csaAcomp_shmid,&new->csaAcomp_len,&seconds,csaA_compfile,sizeof(UINT4),sharedp);
+ new->csaCptrs = (UINT4 *) Access_allocate(&new->csaCptrs_shmid,&new->csaCptrs_len,&seconds,csaC_ptrsfile,sizeof(UINT4),sharedp);
+ new->csaCcomp = (UINT4 *) Access_allocate(&new->csaCcomp_shmid,&new->csaCcomp_len,&seconds,csaC_compfile,sizeof(UINT4),sharedp);
+ new->csaGptrs = (UINT4 *) Access_allocate(&new->csaGptrs_shmid,&new->csaGptrs_len,&seconds,csaG_ptrsfile,sizeof(UINT4),sharedp);
+ new->csaGcomp = (UINT4 *) Access_allocate(&new->csaGcomp_shmid,&new->csaGcomp_len,&seconds,csaG_compfile,sizeof(UINT4),sharedp);
+ new->csaTptrs = (UINT4 *) Access_allocate(&new->csaTptrs_shmid,&new->csaTptrs_len,&seconds,csaT_ptrsfile,sizeof(UINT4),sharedp);
+ new->csaTcomp = (UINT4 *) Access_allocate(&new->csaTcomp_shmid,&new->csaTcomp_len,&seconds,csaT_compfile,sizeof(UINT4),sharedp);
+ new->csaXptrs = (UINT4 *) Access_allocate(&new->csaXptrs_shmid,&new->csaXptrs_len,&seconds,csaX_ptrsfile,sizeof(UINT4),sharedp);
+ new->csaXcomp = (UINT4 *) Access_allocate(&new->csaXcomp_shmid,&new->csaXcomp_len,&seconds,csaX_compfile,sizeof(UINT4),sharedp);
+#endif
+ }
+
+ if (sharedp == true) {
+ new->sarray_access = ALLOCATED_SHARED;
+ } else {
+ new->sarray_access = ALLOCATED_PRIVATE;
+ }
}
- new->n_plus_one = new->array_len/sizeof(UINT4); /* Should be genomiclength + 1*/
- new->n = new->n_plus_one - 1;
+#ifdef USE_CSA
+ new->csa0ptrs[0] = new->csaAptrs; new->csa0comp[0] = new->csaAcomp;
+ new->csa0ptrs[1] = new->csaCptrs; new->csa0comp[1] = new->csaCcomp;
+ new->csa0ptrs[3] = new->csaGptrs; new->csa0comp[3] = new->csaGcomp;
+ new->csa0ptrs[7] = new->csaTptrs; new->csa0comp[7] = new->csaTcomp;
+ new->csa0ptrs[15] = new->csaXptrs; new->csa0comp[15] = new->csaXcomp;
+#endif
+
#ifdef DEBUG15
/* 8 is for two DIFFERENTIAL_METAINFO_SIZE words */
- new->indexi_ptrs = (UINT4 *) Access_allocated(&new->indexi_ptrs_len,&seconds,indexi_ptrsfile,sizeof(UINT4));
- new->indexi_comp = (UINT4 *) Access_allocated(&new->indexi_comp_len,&seconds,indexi_compfile,sizeof(UINT4));
- new->indexj_ptrs = (UINT4 *) Access_allocated(&new->indexj_ptrs_len,&seconds,indexj_ptrsfile,sizeof(UINT4));
- new->indexj_comp = (UINT4 *) Access_allocated(&new->indexj_comp_len,&seconds,indexj_compfile,sizeof(UINT4));
- new->indexij_ptrs = (UINT4 *) Access_allocated(&new->indexij_ptrs_len,&seconds,indexij_ptrsfile,sizeof(UINT4));
- new->indexij_comp = (UINT4 *) Access_allocated(&new->indexij_comp_len,&seconds,indexij_compfile,sizeof(UINT4));
+ new->indexi_ptrs = (UINT4 *) Access_allocate(&key,&new->indexi_ptrs_len,&seconds,indexi_ptrsfile,sizeof(UINT4),/*sharedp*/false);
+ new->indexi_comp = (UINT4 *) Access_allocate(&key,&new->indexi_comp_len,&seconds,indexi_compfile,sizeof(UINT4),/*sharedp*/false);
+ new->indexj_ptrs = (UINT4 *) Access_allocate(&key,&new->indexj_ptrs_len,&seconds,indexj_ptrsfile,sizeof(UINT4),/*sharedp*/false);
+ new->indexj_comp = (UINT4 *) Access_allocate(&key,&new->indexj_comp_len,&seconds,indexj_compfile,sizeof(UINT4),/*sharedp*/false);
+ new->indexij_ptrs = (UINT4 *) Access_allocate(&key,&new->indexij_ptrs_len,&seconds,indexij_ptrsfile,sizeof(UINT4),/*sharedp*/false);
+ new->indexij_comp = (UINT4 *) Access_allocate(&key,&new->indexij_comp_len,&seconds,indexij_compfile,sizeof(UINT4),/*sharedp*/false);
new->indexsize = 3 + log4(((new->indexij_ptrs_len - 8)/sizeof(UINT4)/2)/ /*DIFFERENTIAL_METAINFO_SIZE*/2);
#elif defined(USE_SEPARATE_BUCKETS)
/* 8 is for two DIFFERENTIAL_METAINFO_SIZE words */
- new->indexi_ptrs = (UINT4 *) Access_allocated(&new->indexi_ptrs_len,&seconds,indexi_ptrsfile,sizeof(UINT4));
- new->indexi_comp = (UINT4 *) Access_allocated(&new->indexi_comp_len,&seconds,indexi_compfile,sizeof(UINT4));
- new->indexj_ptrs = (UINT4 *) Access_allocated(&new->indexj_ptrs_len,&seconds,indexj_ptrsfile,sizeof(UINT4));
- new->indexj_comp = (UINT4 *) Access_allocated(&new->indexj_comp_len,&seconds,indexj_compfile,sizeof(UINT4));
+ new->indexi_ptrs = (UINT4 *) Access_allocate(&key,&new->indexi_ptrs_len,&seconds,indexi_ptrsfile,sizeof(UINT4),/*sharedp*/false);
+ new->indexi_comp = (UINT4 *) Access_allocate(&key,&new->indexi_comp_len,&seconds,indexi_compfile,sizeof(UINT4),/*sharedp*/false);
+ new->indexj_ptrs = (UINT4 *) Access_allocate(&key,&new->indexj_ptrs_len,&seconds,indexj_ptrsfile,sizeof(UINT4),/*sharedp*/false);
+ new->indexj_comp = (UINT4 *) Access_allocate(&key,&new->indexj_comp_len,&seconds,indexj_compfile,sizeof(UINT4),/*sharedp*/false);
new->indexsize = 3 + log4(((new->indexi_ptrs_len - 8)/sizeof(UINT4))/ /*DIFFERENTIAL_METAINFO_SIZE*/2);
#else
/* 8 is for two DIFFERENTIAL_METAINFO_SIZE words */
- new->indexij_ptrs = (UINT4 *) Access_allocated(&new->indexij_ptrs_len,&seconds,indexij_ptrsfile,sizeof(UINT4));
- new->indexij_comp = (UINT4 *) Access_allocated(&new->indexij_comp_len,&seconds,indexij_compfile,sizeof(UINT4));
+ if (indexij_access == USE_MMAP_PRELOAD) {
+ fprintf(stderr,"Pre-loading indexij ptrs...");
+ new->indexij_ptrs = (UINT4 *) Access_mmap_and_preload(&new->indexij_ptrs_fd,&new->indexij_ptrs_len,&npages,&seconds,indexij_ptrsfile,
+ sizeof(UINT4));
+ comma1 = Genomicpos_commafmt(new->indexij_ptrs_len);
+ fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
+ FREE(comma1);
+
+ fprintf(stderr,"Pre-loading indexij comp...");
+ new->indexij_comp = (UINT4 *) Access_mmap_and_preload(&new->indexij_comp_fd,&new->indexij_comp_len,&npages,&seconds,indexij_compfile,
+ sizeof(UINT4));
+ comma1 = Genomicpos_commafmt(new->indexij_comp_len);
+ fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
+ FREE(comma1);
+
+ new->indexij_access = MMAPPED;
+
+ } else if (indexij_access == USE_MMAP_ONLY) {
+ new->indexij_ptrs = (UINT4 *) Access_mmap(&new->indexij_ptrs_fd,&new->indexij_ptrs_len,indexij_ptrsfile,sizeof(UINT4),/*randomp*/true);
+ new->indexij_comp = (UINT4 *) Access_mmap(&new->indexij_comp_fd,&new->indexij_comp_len,indexij_compfile,sizeof(UINT4),/*randomp*/true);
+
+ new->indexij_access = MMAPPED;
+
+ } else if (indexij_access == USE_ALLOCATE) {
+ fprintf(stderr,"Allocating memory for indexij ptrs...");
+ new->indexij_ptrs = (UINT4 *) Access_allocate(&new->indexij_ptrs_shmid,&new->indexij_ptrs_len,&seconds,indexij_ptrsfile,sizeof(UINT4),sharedp);
+ comma1 = Genomicpos_commafmt(new->indexij_ptrs_len);
+ fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
+ FREE(comma1);
+
+ fprintf(stderr,"Allocating memory for indexij comp...");
+ new->indexij_comp = (UINT4 *) Access_allocate(&new->indexij_comp_shmid,&new->indexij_comp_len,&seconds,indexij_compfile,sizeof(UINT4),sharedp);
+ comma1 = Genomicpos_commafmt(new->indexij_comp_len);
+ fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
+ FREE(comma1);
+
+ if (sharedp == true) {
+ new->indexij_access = ALLOCATED_SHARED;
+ } else {
+ new->indexij_access = ALLOCATED_PRIVATE;
+ }
+ }
+
new->indexsize = 3 + log4(((new->indexij_ptrs_len - 8)/sizeof(UINT4)/2)/ /*DIFFERENTIAL_METAINFO_SIZE*/2);
#endif
new->indexspace = power(4,new->indexsize);
- if (aux_access == USE_MMAP_PRELOAD) {
+ if (lcp_access == USE_MMAP_PRELOAD) {
fprintf(stderr,"Pre-loading LCP/child/DC arrays...");
new->lcpchilddc = (unsigned char *) Access_mmap_and_preload(&new->lcpchilddc_fd,&new->lcpchilddc_len,&npages,&seconds,
lcpchilddcfile,sizeof(unsigned char));
- new->aux_access = MMAPPED;
+ new->lcp_access = MMAPPED;
comma1 = Genomicpos_commafmt(new->lcpchilddc_len);
- fprintf(stderr,"done (%s bytes)\n",comma1);
+ fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
FREE(comma1);
- } else if (aux_access == USE_MMAP_ONLY) {
+ } else if (lcp_access == USE_MMAP_ONLY) {
new->lcpchilddc = (unsigned char *) Access_mmap(&new->lcpchilddc_fd,&new->lcpchilddc_len,lcpchilddcfile,
sizeof(unsigned char),/*randomp*/true);
- new->aux_access = MMAPPED;
- } else if (aux_access == USE_ALLOCATE) {
- new->lcpchilddc = (unsigned char *) Access_allocated(&new->lcpchilddc_len,&seconds,lcpchilddcfile,sizeof(unsigned char));
- new->aux_access = ALLOCATED;
+ new->lcp_access = MMAPPED;
+ } else if (lcp_access == USE_ALLOCATE) {
+ fprintf(stderr,"Allocating memory for lcpchildc...");
+ new->lcpchilddc = (unsigned char *) Access_allocate(&new->lcpchilddc_shmid,&new->lcpchilddc_len,&seconds,lcpchilddcfile,sizeof(unsigned char),sharedp);
+ comma1 = Genomicpos_commafmt(new->lcpchilddc_len);
+ fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
+ FREE(comma1);
+
+ if (sharedp == true) {
+ new->lcp_access = ALLOCATED_SHARED;
+ } else {
+ new->lcp_access = ALLOCATED_PRIVATE;
+ }
}
- new->lcp_guide = (UINT4 *) Access_allocated(&new->lcp_guide_len,&seconds,lcp_guidefile,sizeof(UINT4));
- new->lcp_exceptions = (UINT4 *) Access_allocated(&new->lcp_exceptions_len,&seconds,lcp_exceptionsfile,sizeof(UINT4));
- new->n_lcp_exceptions = new->lcp_exceptions_len/(sizeof(UINT4) + sizeof(UINT4));
+ if (guideexc_access == USE_MMAP_PRELOAD) {
+ fprintf(stderr,"Pre-loading guide/exceptions...");
+ new->lcp_guide = (UINT4 *) Access_mmap_and_preload(&new->lcp_guide_fd,&new->lcp_guide_len,&npages,&seconds,
+ lcp_guidefile,sizeof(UINT4));
+ new->lcp_exceptions = (UINT4 *) Access_mmap_and_preload(&new->lcp_exceptions_fd,&new->lcp_exceptions_len,&npages,&seconds,
+ lcp_exceptionsfile,sizeof(UINT4));
+ new->child_guide = (UINT4 *) Access_mmap_and_preload(&new->child_guide_fd,&new->child_guide_len,&npages,&seconds,
+ child_guidefile,sizeof(UINT4));
+ new->child_exceptions = (UINT4 *) Access_mmap_and_preload(&new->child_exceptions_fd,&new->child_exceptions_len,&npages,&seconds,
+ child_exceptionsfile,sizeof(UINT4));
+ new->guideexc_access = MMAPPED;
+ fprintf(stderr,"done\n");
+
+ } else if (guideexc_access == USE_MMAP_ONLY) {
+ new->lcp_guide = (UINT4 *) Access_mmap(&new->lcp_guide_fd,&new->lcp_guide_len,
+ lcp_guidefile,sizeof(UINT4),/*randomp*/true);
+ new->lcp_exceptions = (UINT4 *) Access_mmap(&new->lcp_exceptions_fd,&new->lcp_exceptions_len,
+ lcp_exceptionsfile,sizeof(UINT4),/*randomp*/true);
+ new->child_guide = (UINT4 *) Access_mmap(&new->child_guide_fd,&new->child_guide_len,
+ child_guidefile,sizeof(UINT4),/*randomp*/true);
+ new->child_exceptions = (UINT4 *) Access_mmap(&new->child_exceptions_fd,&new->child_exceptions_len,
+ child_exceptionsfile,sizeof(UINT4),/*randomp*/true);
+ new->guideexc_access = MMAPPED;
+
+ } else if (guideexc_access == USE_ALLOCATE) {
+ fprintf(stderr,"Allocating memory for lcp guide...");
+ new->lcp_guide = (UINT4 *) Access_allocate(&new->lcp_guide_shmid,&new->lcp_guide_len,&seconds,lcp_guidefile,sizeof(UINT4),sharedp);
+ comma1 = Genomicpos_commafmt(new->lcp_guide_len);
+ fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
+ FREE(comma1);
+
+ fprintf(stderr,"Allocating memory for lcp exceptions...");
+ new->lcp_exceptions = (UINT4 *) Access_allocate(&new->lcp_exceptions_shmid,&new->lcp_exceptions_len,&seconds,lcp_exceptionsfile,sizeof(UINT4),sharedp);
+ comma1 = Genomicpos_commafmt(new->lcp_exceptions_len);
+ fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
+ FREE(comma1);
+
+ fprintf(stderr,"Allocating memory for child guide...");
+ new->child_guide = (UINT4 *) Access_allocate(&new->child_guide_shmid,&new->child_guide_len,&seconds,child_guidefile,sizeof(UINT4),sharedp);
+ comma1 = Genomicpos_commafmt(new->child_guide_len);
+ fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
+ FREE(comma1);
+
+ fprintf(stderr,"Allocating memory for child exceptions...");
+ new->child_exceptions = (UINT4 *) Access_allocate(&new->child_exceptions_shmid,&new->child_exceptions_len,&seconds,child_exceptionsfile,sizeof(UINT4),sharedp);
+ comma1 = Genomicpos_commafmt(new->child_exceptions_len);
+ fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
+ FREE(comma1);
+
+ if (sharedp == true) {
+ new->guideexc_access = ALLOCATED_SHARED;
+ } else {
+ new->guideexc_access = ALLOCATED_PRIVATE;
+ }
+ }
- new->child_guide = (UINT4 *) Access_allocated(&new->child_guide_len,&seconds,child_guidefile,sizeof(UINT4));
- new->child_exceptions = (UINT4 *) Access_allocated(&new->child_exceptions_len,&seconds,child_exceptionsfile,sizeof(UINT4));
+ new->n_lcp_exceptions = new->lcp_exceptions_len/(sizeof(UINT4) + sizeof(UINT4));
new->child_guide_interval = 1024;
}
@@ -630,6 +1214,15 @@ Sarray_new (char *dir, char *fileroot, char *snps_root, Access_mode_T sarray_acc
FREE(indexij_ptrsfile);
#endif
+#ifdef USE_CSA
+ FREE(csaX_ptrsfile); FREE(csaX_compfile);
+ FREE(csaT_ptrsfile); FREE(csaT_compfile);
+ FREE(csaG_ptrsfile); FREE(csaG_compfile);
+ FREE(csaC_ptrsfile); FREE(csaC_compfile);
+ FREE(csaA_ptrsfile); FREE(csaA_compfile);
+
+ FREE(sasamplesfile);
+#endif
FREE(sarrayfile);
return new;
@@ -652,29 +1245,130 @@ Sarray_free (T *old) {
FREE((*old)->indexj_ptrs);
FREE((*old)->indexj_comp);
#else
- FREE((*old)->indexij_ptrs);
- FREE((*old)->indexij_comp);
+ if ((*old)->indexij_access == MMAPPED) {
+ munmap((void *) (*old)->indexij_ptrs,(*old)->indexij_ptrs_len);
+ close((*old)->indexij_ptrs_fd);
+ munmap((void *) (*old)->indexij_comp,(*old)->indexij_comp_len);
+ close((*old)->indexij_comp_fd);
+ } else if ((*old)->indexij_access == ALLOCATED_PRIVATE) {
+ FREE((*old)->indexij_ptrs);
+ FREE((*old)->indexij_comp);
+ } else if ((*old)->indexij_access == ALLOCATED_SHARED) {
+ Access_deallocate((*old)->indexij_ptrs,(*old)->indexij_ptrs_shmid);
+ Access_deallocate((*old)->indexij_comp,(*old)->indexij_comp_shmid);
+ }
#endif
- FREE((*old)->lcp_exceptions);
- FREE((*old)->lcp_guide);
- FREE((*old)->child_exceptions);
- FREE((*old)->child_guide);
- if ((*old)->aux_access == MMAPPED) {
+ if ((*old)->guideexc_access == MMAPPED) {
+ munmap((void *) (*old)->lcp_guide,(*old)->lcp_guide_len);
+ close((*old)->lcp_guide_fd);
+ munmap((void *) (*old)->lcp_exceptions,(*old)->lcp_exceptions_len);
+ close((*old)->lcp_exceptions_fd);
+ munmap((void *) (*old)->child_guide,(*old)->child_guide_len);
+ close((*old)->child_guide_fd);
+ munmap((void *) (*old)->child_exceptions,(*old)->child_exceptions_len);
+ close((*old)->child_exceptions_fd);
+ } else if ((*old)->guideexc_access == ALLOCATED_PRIVATE) {
+ FREE((*old)->lcp_exceptions);
+ FREE((*old)->lcp_guide);
+ FREE((*old)->child_exceptions);
+ FREE((*old)->child_guide);
+ } else if ((*old)->guideexc_access == ALLOCATED_SHARED) {
+ Access_deallocate((*old)->lcp_exceptions,(*old)->lcp_exceptions_shmid);
+ Access_deallocate((*old)->lcp_guide,(*old)->lcp_guide_shmid);
+ Access_deallocate((*old)->child_exceptions,(*old)->child_exceptions_shmid);
+ Access_deallocate((*old)->child_guide,(*old)->child_guide_shmid);
+ }
+
+ if ((*old)->lcp_access == MMAPPED) {
munmap((void *) (*old)->lcpchilddc,(*old)->lcpchilddc_len);
close((*old)->lcpchilddc_fd);
- } else {
+ } else if ((*old)->lcp_access == ALLOCATED_PRIVATE) {
FREE((*old)->lcpchilddc);
+ } else if ((*old)->lcp_access == ALLOCATED_SHARED) {
+ Access_deallocate((*old)->lcpchilddc,(*old)->lcpchilddc_shmid);
}
+#ifndef USE_CSA
if ((*old)->sarray_access == MMAPPED) {
munmap((void *) (*old)->array,(*old)->array_len);
close((*old)->array_fd);
- } else {
+ } else if ((*old)->sarray_access == ALLOCATED_PRIVATE) {
FREE((*old)->array);
+ } else if ((*old)->sarray_access == ALLOCATED_SHARED) {
+ Access_deallocate((*old)->array,(*old)->array_shmid);
+ }
+#else
+ if ((*old)->array != NULL) {
+ if ((*old)->sarray_access == MMAPPED) {
+ munmap((void *) (*old)->array,(*old)->array_len);
+ close((*old)->array_fd);
+ } else if ((*old)->sarray_access == ALLOCATED_PRIVATE) {
+ FREE((*old)->array);
+ } else if ((*old)->sarray_access == ALLOCATED_SHARED) {
+ Access_deallocate((*old)->array,(*old)->array_shmid);
+ }
+ } else {
+ if ((*old)->sarray_access == MMAPPED) {
+ munmap((void *) (*old)->array_samples,(*old)->array_len);
+ close((*old)->array_fd);
+
+ munmap((void *) (*old)->csaAptrs,(*old)->csaAptrs_len);
+ close((*old)->csaAptrs_fd);
+ munmap((void *) (*old)->csaAcomp,(*old)->csaAcomp_len);
+ close((*old)->csaAcomp_fd);
+
+ munmap((void *) (*old)->csaCptrs,(*old)->csaCptrs_len);
+ close((*old)->csaCptrs_fd);
+ munmap((void *) (*old)->csaCcomp,(*old)->csaCcomp_len);
+ close((*old)->csaCcomp_fd);
+
+ munmap((void *) (*old)->csaGptrs,(*old)->csaGptrs_len);
+ close((*old)->csaGptrs_fd);
+ munmap((void *) (*old)->csaGcomp,(*old)->csaGcomp_len);
+ close((*old)->csaGcomp_fd);
+
+ munmap((void *) (*old)->csaTptrs,(*old)->csaTptrs_len);
+ close((*old)->csaTptrs_fd);
+ munmap((void *) (*old)->csaTcomp,(*old)->csaTcomp_len);
+ close((*old)->csaTcomp_fd);
+
+ munmap((void *) (*old)->csaXptrs,(*old)->csaXptrs_len);
+ close((*old)->csaXptrs_fd);
+ munmap((void *) (*old)->csaXcomp,(*old)->csaXcomp_len);
+ close((*old)->csaXcomp_fd);
+
+ } else if ((*old)->sarray_access == ALLOCATED_PRIVATE) {
+ FREE((*old)->array_samples);
+ FREE((*old)->csaAptrs);
+ FREE((*old)->csaAcomp);
+ FREE((*old)->csaCptrs);
+ FREE((*old)->csaCcomp);
+ FREE((*old)->csaGptrs);
+ FREE((*old)->csaGcomp);
+ FREE((*old)->csaTptrs);
+ FREE((*old)->csaTcomp);
+ FREE((*old)->csaXptrs);
+ FREE((*old)->csaXcomp);
+
+ } else if ((*old)->sarray_access == ALLOCATED_SHARED) {
+ Access_deallocate((*old)->array_samples,(*old)->array_shmid);
+ Access_deallocate((*old)->csaAptrs,(*old)->csaAptrs_shmid);
+ Access_deallocate((*old)->csaAcomp,(*old)->csaAcomp_shmid);
+ Access_deallocate((*old)->csaCptrs,(*old)->csaCptrs_shmid);
+ Access_deallocate((*old)->csaCcomp,(*old)->csaCcomp_shmid);
+ Access_deallocate((*old)->csaGptrs,(*old)->csaGptrs_shmid);
+ Access_deallocate((*old)->csaGcomp,(*old)->csaGcomp_shmid);
+ Access_deallocate((*old)->csaTptrs,(*old)->csaTptrs_shmid);
+ Access_deallocate((*old)->csaTcomp,(*old)->csaTcomp_shmid);
+ Access_deallocate((*old)->csaXptrs,(*old)->csaXptrs_shmid);
+ Access_deallocate((*old)->csaXcomp,(*old)->csaXcomp_shmid);
+ }
}
+#endif
- FREE(*old);
+
+ FREE_KEEP(*old);
}
return;
@@ -987,13 +1681,15 @@ find_longest_match (UINT4 nmatches, Sarrayptr_T *initptr, Sarrayptr_T *finalptr,
UINT4 lcp_whole, nextl, up;
UINT4 minlength;
UINT4 l, r;
+ Univcoord_T SA_i;
while (nmatches < querylength) {
if (i == j) {
/* Singleton interval */
debug1(printf("Singleton interval %u..%u\n",i,j));
+ SA_i = csa_lookup(sarray,i);
nmatches +=
- Genome_consecutive_matches_rightward(query_compress,/*left*/sarray->array[i]-queryoffset,
+ Genome_consecutive_matches_rightward(query_compress,/*left*/SA_i-queryoffset,
/*pos5*/queryoffset+nmatches,/*pos3*/queryoffset+querylength,
plusp,genestrand,first_read_p);
*initptr = i;
@@ -1022,8 +1718,9 @@ find_longest_match (UINT4 nmatches, Sarrayptr_T *initptr, Sarrayptr_T *finalptr,
/* Check only up to minlength, so we validate the entire interval */
minlength = (lcp_whole < querylength) ? lcp_whole : querylength;
debug1(printf("Looking up genome for query from %d .. %d - 1\n",nmatches,minlength));
+ SA_i = csa_lookup(sarray,i);
nmatches +=
- Genome_consecutive_matches_rightward(query_compress,/*left*/sarray->array[i]-queryoffset,
+ Genome_consecutive_matches_rightward(query_compress,/*left*/SA_i-queryoffset,
/*pos5*/queryoffset+nmatches,/*pos3*/queryoffset+minlength,
plusp,genestrand,first_read_p);
if (nmatches < minlength) {
@@ -1074,18 +1771,15 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
Storedoligomer_T oligo;
UINT4 l, r;
-
-#ifdef DEBUG
+#ifdef DEBUG1
+ Univcoord_T SA_i;
int k = 0;
UINT4 recount;
char Buffer[1000];
- Univcoord_T hit;
bool failp;
-#elif defined(DEBUG1)
- char Buffer[1000];
#endif
- debug(printf("sarray_search on %s, querylength %d, plusp %d\n",query,querylength,plusp));
+ debug1(printf("sarray_search on %.*s, querylength %d, plusp %d\n",querylength,query,querylength,plusp));
/* Find initial lcp-interval */
effective_querylength = nt_querylength(query,querylength);
@@ -1099,132 +1793,30 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
} else if (effective_querylength < sarray->indexsize) {
debug1(printf("string %.*s with effective querylength %d is shorter than indexsize",
querylength,query,effective_querylength));
-
-#if 1
l = 1;
r = sarray->n;
-#else
- /* Try to infer from 12-mer index, but can be tricky when N's are present */
- oligo = nt_oligo_truncate(query,effective_querylength,sarray->indexsize,/*subst_value for A*/0);
+ } else {
+ oligo = nt_oligo(query,sarray->indexsize);
#ifdef DEBUG15
- if ((l = Bitpack64_read_one(oligo*2,sarray->indexij_ptrs,sarray->indexij_comp)) !=
- Bitpack64_read_one(oligo,sarray->indexi_ptrs,sarray->indexi_comp)) {
- abort();
- }
+ if ((l = Bitpack64_read_two(&r,oligo*2,sarray->indexij_ptrs,sarray->indexij_comp)) !=
+ Bitpack64_read_one(oligo,sarray->indexi_ptrs,sarray->indexi_comp)) {
+ abort();
+ } else if (r - 1 != Bitpack64_read_one(oligo,sarray->indexj_ptrs,sarray->indexj_comp)) {
+ printf("For oligo %u, separate buckets give %u and %u, while single bucket gives %u and %u\n",
+ oligo,
+ Bitpack64_read_one(oligo,sarray->indexi_ptrs,sarray->indexi_comp),
+ Bitpack64_read_one(oligo,sarray->indexj_ptrs,sarray->indexj_comp),
+ l,r);
+ abort();
+ }
+ r--; /* Because interleaved writes r+1 to maintain monotonicity */
#elif defined(USE_SEPARATE_BUCKETS)
- l = Bitpack64_read_one(oligo,sarray->indexi_ptrs,sarray->indexi_comp);
+ l = Bitpack64_read_one(oligo,sarray->indexi_ptrs,sarray->indexi_comp);
+ r = Bitpack64_read_one(oligo,sarray->indexj_ptrs,sarray->indexj_comp);
#else
- l = Bitpack64_read_one(oligo*2,sarray->indexij_ptrs,sarray->indexij_comp);
-#endif
- debug1(printf(" => oligo %08X",oligo));
- }
-
- /* Because $ < A, we need to check for this case. Need to back up just 1. */
- /* Test is SA[l-1] + indexsize - 1 >= n, or SA[l-1] + indexsize > n */
- debug1(printf("Comparing SA %u + indexsize %d with n %u\n",sarray->array[l-1],sarray->indexsize,sarray->n));
- if (l > 1 && sarray->array[l-1] + sarray->indexsize > sarray->n) {
- debug1(printf(" (backing up one position for l, because at end of genome)"));
- l--;
- }
-
- /* Add 1 to rollover to next oligo, to handle Ns in genome */
- oligo = nt_oligo_truncate(query,effective_querylength,sarray->indexsize,/*subst_value for T*/3) + 1;
- oligo_prev = oligo - 1;
-
- r = Bitpack64_read_one(oligo*2,sarray->indexij_ptrs,sarray->indexij_comp) - 1;
- r_prev = Bitpack64_offsetptr_only(oligo_prev*2+1,sarray->indexij_ptrs,sarray->indexij_comp);
-
- if (r != r_prev) {
- debug1(printf("r is %u - 1, but r_prev is %u, indicating the presence of N's => Starting from root\n",
- r+1,r_prev));
- l = 1;
- r = sarray->n;
-
- } else if (oligo == sarray->indexspace) {
- /* We have a poly-T, so we cannot determine r. For example,
- TTTTTN has a different r value than TTN. */
- debug1(printf(" but poly-T => 1-letter for T: %u..%u\n",l,r));
-#if 0
- l = sarray->initindexi[3];
- r = sarray->initindexj[3];
- /* Keep nmatches = 0, because there may not be a T in the genome */
-#else
- l = 1;
- r = sarray->n;
-#endif
-
- } else {
-#if 0
- /* Already computed above */
-#ifdef DEBUG15
- if ((r = Bitpack64_read_one(oligo*2,sarray->indexij_ptrs,sarray->indexij_comp) - 1) !=
- Bitpack64_read_one(oligo,sarray->indexi_ptrs,sarray->indexi_comp) - 1) {
- abort();
- }
-#elif defined(USE_SEPARATE_BUCKETS)
- r = Bitpack64_read_one(oligo,sarray->indexi_ptrs,sarray->indexi_comp) - 1;
-#else
- r = Bitpack64_read_one(oligo*2,sarray->indexij_ptrs,sarray->indexij_comp) - 1;
-#endif
-#endif
-
- /* Because $ < A, we need to check for this case. Need to back up just 1. */
- /* Test is SA[r] + indexsize - 1 >= n, or SA[r] + indexsize > n */
- debug1(printf(" (checking %u + %d >= %u)",sarray->array[r],effective_querylength,sarray->n));
- if (r > 0 && sarray->array[r] + sarray->indexsize > sarray->n) {
- debug1(printf(" (backing up one position for r, because at end of genome)"));
- r--;
- }
- debug1(printf(" and %08X => interval %u..%u (effective_querylength %d)",
- oligo,l,r,effective_querylength));
-
- if (l <= r) {
- /* Keep nmatches = 0, since we don't know the value yet */
- debug1(printf(" (good)\n"));
- } else {
-#if 0
- /* Did not find a match using saindex, so resort to one letter */
- switch (query[0]) {
- case 'A': l = sarray->initindexi[0]; r = sarray->initindexj[0]; break;
- case 'C': l = sarray->initindexi[1]; r = sarray->initindexj[1]; break;
- case 'G': l = sarray->initindexi[2]; r = sarray->initindexj[2]; break;
- case 'T': l = sarray->initindexi[3]; r = sarray->initindexj[3]; break;
- default: l = 1; r = 0;
- }
- debug1(printf(" (bad) => 1-letter from %c: %u..%u\n",query[0],l,r));
-#else
- /* The entire lcp-interval [1,sarray->n] should also work without initindex */
- l = 1;
- r = sarray->n;
- debug1(printf(" (bad) => entire lcp-interval: %u..%u\n",l,r));
-#endif
- }
- }
- /* End of code to infer from 12-mers */
-#endif
-
- } else {
- oligo = nt_oligo(query,sarray->indexsize);
-#ifdef DEBUG15
- if ((l = Bitpack64_read_two(&r,oligo*2,sarray->indexij_ptrs,sarray->indexij_comp)) !=
- Bitpack64_read_one(oligo,sarray->indexi_ptrs,sarray->indexi_comp)) {
- abort();
- } else if (r - 1 != Bitpack64_read_one(oligo,sarray->indexj_ptrs,sarray->indexj_comp)) {
- printf("For oligo %u, separate buckets give %u and %u, while single bucket gives %u and %u\n",
- oligo,
- Bitpack64_read_one(oligo,sarray->indexi_ptrs,sarray->indexi_comp),
- Bitpack64_read_one(oligo,sarray->indexj_ptrs,sarray->indexj_comp),
- l,r);
- abort();
- }
- r--; /* Because interleaved writes r+1 to maintain monotonicity */
-#elif defined(USE_SEPARATE_BUCKETS)
- l = Bitpack64_read_one(oligo,sarray->indexi_ptrs,sarray->indexi_comp);
- r = Bitpack64_read_one(oligo,sarray->indexj_ptrs,sarray->indexj_comp);
-#else
- l = Bitpack64_read_two(&r,oligo*2,sarray->indexij_ptrs,sarray->indexij_comp);
- r--; /* Because interleaved writes r+1 to maintain monotonicity */
+ l = Bitpack64_read_two(&r,oligo*2,sarray->indexij_ptrs,sarray->indexij_comp);
+ r--; /* Because interleaved writes r+1 to maintain monotonicity */
#endif
debug1(printf("string %.*s is equal/longer than indexsize %d => oligo %u => interval %u..%u",
querylength,query,sarray->indexsize,oligo,l,r));
@@ -1234,22 +1826,10 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
/* i = l; */
/* j = r; */
} else {
-#if 0
- /* Did not find a match using saindex, so resort to one letter */
- switch (query[0]) {
- case 'A': l = sarray->initindexi[0]; r = sarray->initindexj[0]; break;
- case 'C': l = sarray->initindexi[1]; r = sarray->initindexj[1]; break;
- case 'G': l = sarray->initindexi[2]; r = sarray->initindexj[2]; break;
- case 'T': l = sarray->initindexi[3]; r = sarray->initindexj[3]; break;
- default: l = 1; r = 0;
- }
- debug1(printf(" (bad) => 1-letter from %c: %u..%u\n",query[0],l,r));
-#else
/* The entire lcp-interval [1,sarray->n] should also work without initindex */
l = 1;
r = sarray->n;
debug1(printf(" (bad) => entire lcp-interval: %u..%u\n",l,r));
-#endif
}
}
@@ -1264,45 +1844,46 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
}
/* Search through suffix tree */
- debug(printf("initptr gets %u, finalptr gets %u\n",*initptr,*finalptr));
+ debug1(printf("initptr gets %u, finalptr gets %u\n",*initptr,*finalptr));
if (*nmatches < querylength) {
*successp = false;
- debug(printf("%s fail at %d: got %d hits with %d matches:\n",
+ debug1(printf("%s fail at %d: got %d hits with %d matches:\n",
plusp ? "plus" : "minus",queryoffset,(*finalptr - *initptr + 1),*nmatches));
} else {
*successp = true;
- debug(printf("%s success at %d: got %d hits with %d matches:\n",
+ debug1(printf("%s success at %d: got %d hits with %d matches:\n",
plusp ? "plus" : "minus",queryoffset,(*finalptr - *initptr + 1),*nmatches));
}
-#ifdef DEBUG
+#ifdef DEBUG1
failp = false;
/* Before */
if (*nmatches > 0 && *initptr > 0U) {
- recount = Genome_consecutive_matches_rightward(query_compress,/*left*/sarray->array[(*initptr)-1]-queryoffset,
+ SA_i = csa_lookup(sarray,(*initptr)-1);
+ recount = Genome_consecutive_matches_rightward(query_compress,/*left*/SA_i-queryoffset,
/*pos5*/queryoffset,/*pos3*/queryoffset+querylength,
plusp,genestrand,first_read_p);
- printf("%d\t%u\t%u\t",recount,(*initptr)-1,sarray->array[(*initptr)-1] /*+ 1U*/);
+ printf("%d\t%u\t%u\t",recount,(*initptr)-1,SA_i/*+ 1U*/);
if (genestrand == +2) {
if (plusp) {
- Genome_fill_buffer_convert_rev(sarray->array[(*initptr)-1],recount+1,Buffer);
+ Genome_fill_buffer_convert_rev(SA_i,recount+1,Buffer);
} else {
- Genome_fill_buffer_convert_fwd(sarray->array[(*initptr)-1],recount+1,Buffer);
+ Genome_fill_buffer_convert_fwd(SA_i,recount+1,Buffer);
}
} else {
if (plusp) {
- Genome_fill_buffer_convert_fwd(sarray->array[(*initptr)-1],recount+1,Buffer);
+ Genome_fill_buffer_convert_fwd(SA_i,recount+1,Buffer);
} else {
- Genome_fill_buffer_convert_rev(sarray->array[(*initptr)-1],recount+1,Buffer);
+ Genome_fill_buffer_convert_rev(SA_i,recount+1,Buffer);
}
}
printf("%s\n",Buffer);
if (recount >= *nmatches) {
printf("querylength is %d\n",querylength);
printf("false negative: recount %d at %u before init does equal expected nmatches %d\n",
- recount,sarray->array[(*initptr)-1],*nmatches);
+ recount,SA_i,*nmatches);
failp = true;
}
}
@@ -1310,7 +1891,38 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
/* Hits */
- for (k = 0; k < (int) (*finalptr - *initptr + 1) && k < 100; k++) {
+ for (k = 0; k < (int) (*finalptr - *initptr + 1) && k < MAX_DEBUG1_HITS; k++) {
+ SA_i = csa_lookup(sarray,(*initptr)+k);
+ recount = Genome_consecutive_matches_rightward(query_compress,/*left*/SA_i-queryoffset,
+ /*pos5*/queryoffset,/*pos3*/queryoffset+querylength,
+ plusp,genestrand,first_read_p);
+ printf("%d\t%u\t%u\t",recount,(*initptr)+k,SA_i/*+ 1U*/);
+ if (genestrand == +2) {
+ if (plusp) {
+ Genome_fill_buffer_convert_rev(SA_i,recount+1,Buffer);
+ } else {
+ Genome_fill_buffer_convert_fwd(SA_i,recount+1,Buffer);
+ }
+ } else {
+ if (plusp) {
+ Genome_fill_buffer_convert_fwd(SA_i,recount+1,Buffer);
+ } else {
+ Genome_fill_buffer_convert_rev(SA_i,recount+1,Buffer);
+ }
+ }
+ printf("%s\n",Buffer);
+ if (recount != *nmatches) {
+ printf("querylength is %d\n",querylength);
+ printf("false positive: recount %d at %u does not equal expected nmatches %d\n",
+ recount,csa_lookup(sarray,(*initptr)),*nmatches);
+ failp = true;
+ }
+ }
+
+ if (k < (int) (*finalptr - *initptr + 1)) {
+ /* Overflow */
+ printf("...\n");
+ k = (int) (*finalptr - *initptr);
hit = sarray->array[(*initptr)+k];
recount = Genome_consecutive_matches_rightward(query_compress,/*left*/hit-queryoffset,
/*pos5*/queryoffset,/*pos3*/queryoffset+querylength,
@@ -1341,30 +1953,30 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
/* After */
- if (*nmatches > 0 && sarray->array[(*finalptr)+1] > 0U) {
+ if (*nmatches > 0 && (SA_i = csa_lookup(sarray,(*finalptr)+1)) > 0U) {
printf("\n");
- recount = Genome_consecutive_matches_rightward(query_compress,/*left*/sarray->array[(*finalptr)+1]-queryoffset,
+ recount = Genome_consecutive_matches_rightward(query_compress,/*left*/SA_i-queryoffset,
/*pos5*/queryoffset,/*pos3*/queryoffset+querylength,
plusp,genestrand,first_read_p);
- printf("%d\t%u\t%u\t",recount,(*finalptr)+1,sarray->array[(*finalptr)+1] /*+ 1U*/);
+ printf("%d\t%u\t%u\t",recount,(*finalptr)+1,SA_i/*+ 1U*/);
if (genestrand == +2) {
if (plusp) {
- Genome_fill_buffer_convert_rev(sarray->array[(*finalptr)+1],recount+1,Buffer);
+ Genome_fill_buffer_convert_rev(SA_i,recount+1,Buffer);
} else {
- Genome_fill_buffer_convert_fwd(sarray->array[(*finalptr)+1],recount+1,Buffer);
+ Genome_fill_buffer_convert_fwd(SA_i,recount+1,Buffer);
}
} else {
if (plusp) {
- Genome_fill_buffer_convert_fwd(sarray->array[(*finalptr)+1],recount+1,Buffer);
+ Genome_fill_buffer_convert_fwd(SA_i,recount+1,Buffer);
} else {
- Genome_fill_buffer_convert_rev(sarray->array[(*finalptr)+1],recount+1,Buffer);
+ Genome_fill_buffer_convert_rev(SA_i,recount+1,Buffer);
}
}
printf("%s\n",Buffer);
if (recount >= *nmatches) {
printf("querylength is %d\n",querylength);
printf("false negative: recount %d at %u after (*finalptr) does equal expected nmatches %d\n",
- recount,sarray->array[(*finalptr)+1],*nmatches);
+ recount,SA_i,*nmatches);
failp = true;
}
}
@@ -1380,6 +1992,9 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
}
+/* For fill_positions_all: ELT_VIRGIN -> ELT_FILLED */
+/* For fill_positions_filtered: ELT_VIRGIN -(1st call)-> ELT_UNSORTED -(2nd call)-> ELT_SORTED */
+typedef enum {ELT_VIRGIN, ELT_FILLED, ELT_UNSORTED, ELT_SORTED} Elt_status_T;
/* Simplified version of Spanningelt_T */
@@ -1387,33 +2002,90 @@ typedef struct Elt_T *Elt_T;
struct Elt_T {
int querystart;
int queryend;
- Univcoord_T nmatches;
+
+ int querystart_leftward; /* Modified when we extend matches leftward */
+ int queryend_leftward; /* Modified when we extend matches leftward */
+
+ int nmatches;
Sarrayptr_T initptr; /* in sarray */
Sarrayptr_T finalptr;
+ Sarrayptr_T nptr;
- Univcoord_T *positions_allocated;
+ Univcoord_T *positions_allocated; /* all or filtered positions needed */
Univcoord_T *positions;
+ int npositions_allocated;
int npositions; /* from goal to high */
- bool filledp; /* for development purposes */
+
+ bool temporaryp;
+ bool fillin_p; /* Created by oligoindex algorithm */
+
+ /* filled/sorted by Elt_fill_positions_filtered to speed up on multiple calls */
+ Univcoord_T *all_positions;
+ int n_all_positions;
+
+ Elt_status_T status;
};
+static void
+Elt_reset (Elt_T this) {
+ this->querystart_leftward = this->querystart;
+ this->queryend_leftward = this->queryend;
+ return;
+}
+
+
static Elt_T
-Elt_new (int querypos, int nmatches, Sarrayptr_T initptr, Sarrayptr_T finalptr) {
+Elt_new (int querypos, int nmatches, Sarrayptr_T initptr, Sarrayptr_T finalptr, bool temporaryp) {
Elt_T new = (Elt_T) MALLOC(sizeof(*new));
- new->querystart = querypos;
- new->queryend = querypos + nmatches - 1;
+ new->querystart = new->querystart_leftward = querypos;
+ new->queryend = new->queryend_leftward = querypos + nmatches - 1;
new->nmatches = nmatches;
new->initptr = initptr;
new->finalptr = finalptr;
+ new->nptr = new->finalptr - new->initptr + 1;
+ /* new->positions is a pointer that advances to goal */
new->positions_allocated = new->positions = (Univcoord_T *) NULL;
- new->npositions = 0;
+ new->npositions_allocated = new->npositions = 0;
+
+ new->temporaryp = temporaryp;
+ new->fillin_p = false;
+
+ new->all_positions = (Univcoord_T *) NULL;
+ new->n_all_positions = 0;
+
+ new->status = ELT_VIRGIN;
+
+ return new;
+}
+
+static Elt_T
+Elt_new_fillin (int querystart, int queryend, int indexsize, Univcoord_T left) {
+ Elt_T new = (Elt_T) MALLOC(sizeof(*new));
+
+ new->querystart = new->querystart_leftward = querystart;
+ new->queryend = new->queryend_leftward = queryend + indexsize - 1;
+ new->nmatches = new->queryend - querystart + 1;
+
+ new->initptr = 0;
+ new->finalptr = 0;
+ new->nptr = 0;
- new->filledp = false;
+ new->npositions = 1;
+ new->positions_allocated = new->positions = (Univcoord_T *) MALLOC(sizeof(Univcoord_T));
+ new->positions[0] = left;
+
+ new->temporaryp = true;
+ new->fillin_p = true;
+
+ new->all_positions = (Univcoord_T *) NULL;
+ new->n_all_positions = 0;
+
+ new->status = ELT_VIRGIN;
return new;
}
@@ -1431,9 +2103,16 @@ Elt_replace (Elt_T this, int querypos, int nmatches, Sarrayptr_T initptr, Sarray
FREE(this->positions_allocated);
}
this->positions_allocated = this->positions = (Univcoord_T *) NULL;
- this->npositions = 0;
+ this->npositions_allocated = this->npositions = 0;
+
- this->filledp = false;
+ if (this->all_positions != NULL) {
+ FREE(this->all_positions);
+ }
+ this->all_positions = (Univcoord_T *) NULL;
+ this->n_all_positions = 0;
+
+ this->status = ELT_VIRGIN;
return;
}
@@ -1445,6 +2124,9 @@ Elt_free (Elt_T *old) {
if ((*old)->positions_allocated != NULL) {
FREE((*old)->positions_allocated);
}
+ if ((*old)->all_positions != NULL) {
+ FREE((*old)->all_positions);
+ }
FREE(*old);
return;
}
@@ -1495,39 +2177,66 @@ Elt_querypos_descending_cmp (const void *a, const void *b) {
}
+static int
+Elt_extend_leftward (int *min_leftward, Elt_T elt, Compress_T query_compress,
+ bool plusp, int genestrand, bool first_read_p, int skip_left) {
+ int max_leftward, nmatches;
+ int i;
+
+ if (elt->npositions == 0) {
+ *min_leftward = 0;
+ return 0;
+ } else {
+ max_leftward = *min_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/elt->positions[0],
+ /*pos5*/0,/*pos3*/elt->querystart - skip_left,
+ plusp,genestrand,first_read_p);
+ for (i = 1; i < elt->npositions; i++) {
+ if ((nmatches = Genome_consecutive_matches_leftward(query_compress,/*left*/elt->positions[i],
+ /*pos5*/0,/*pos3*/elt->querystart,
+ plusp,genestrand,first_read_p)) > max_leftward) {
+ max_leftward = nmatches;
+ } else if (nmatches < *min_leftward) {
+ *min_leftward = nmatches;
+ }
+ }
+ return max_leftward;
+ }
+}
+
+
static void
Elt_fill_positions_all (Elt_T this, T sarray) {
Sarrayptr_T ptr;
Univcoord_T pos;
int i;
- debug(printf("Entering Elt_fill_positions_all on %p\n",this));
+ debug7(printf("Entering Elt_fill_positions_all on %p\n",this));
if (this->positions_allocated != NULL) {
- debug(printf(" positions_allocated is already non-NULL, so skipping\n"));
+ debug7(printf(" positions_allocated is already non-NULL, so skipping\n"));
+ /* Don't free positions_allocated. Use it. */
} else {
- this->npositions = this->finalptr - this->initptr + 1;
- debug(printf(" filling %d positions\n",this->npositions));
+ this->npositions_allocated = this->npositions = this->finalptr - this->initptr + 1;
+ debug7(printf(" filling %d positions\n",this->npositions));
if (this->nmatches == 0 || this->npositions > EXCESS_SARRAY_HITS) {
this->positions_allocated = this->positions = (Univcoord_T *) NULL;
- this->npositions = 0;
+ this->npositions_allocated = this->npositions = 0;
} else {
this->positions_allocated = this->positions = (Univcoord_T *) CALLOC(this->npositions,sizeof(Univcoord_T));
i = 0;
ptr = this->initptr;
while (ptr <= this->finalptr) {
- if ((pos = sarray->array[ptr++]) >= (Univcoord_T) this->querystart) {
+ if ((pos = csa_lookup(sarray,ptr++)) >= (Univcoord_T) this->querystart) {
this->positions[i++] = pos - this->querystart;
}
}
this->npositions = i;
qsort(this->positions,this->npositions,sizeof(Univcoord_T),Univcoord_compare);
}
-
- this->filledp = true;
}
+ this->status = ELT_FILLED;
return;
}
@@ -1564,6 +2273,7 @@ static void
positions_compare (Univcoord_T *positions, int npositions,
Univcoord_T *positions_std, int npositions_std) {
int i;
+ bool problemp = false;
if (npositions != npositions_std) {
fprintf(stderr,"npositions %d != npositions_std %d\n",npositions,npositions_std);
@@ -1584,9 +2294,12 @@ positions_compare (Univcoord_T *positions, int npositions,
for (i = 0; i < npositions; i++) {
if (positions[i] != positions_std[i]) {
fprintf(stderr,"At %d, positions %u != positions_std %u\n",i,positions[i],positions_std[i]);
- abort();
+ problemp = true;
}
}
+ if (problemp == true) {
+ abort();
+ }
}
return;
@@ -1653,40 +2366,55 @@ fill_positions_std (int *npositions, Univcoord_T low_adj, Univcoord_T high_adj,
+/* Call fill_positions_filtered_first for first time, which is
+ linear in number of entries or O(n), then on second call, do sort with O(n*log n),
+ plus O(log n) for each additional call */
+
#ifdef HAVE_ALLOCA
#if defined(HAVE_SSSE3) && defined(HAVE_SSE2)
/* SSSE3 needed for _mm_shuffle_epi8 */
static void
-Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T low, Univcoord_T high,
- Compress_T query_compress, bool plusp, int genestrand, bool first_read_p) {
+fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T low, Univcoord_T high,
+ Compress_T query_compress, bool plusp, int genestrand, bool first_read_p) {
int nmatches;
- int i;
- Univcoord_T *array = sarray->array, low_adj, high_adj, value;
+ Univcoord_T low_adj, high_adj;
+#ifdef USE_CSA
+ Univcoord_T value3, value2, value1, value0, *all;
+ Sarrayptr_T ptri, stopi, endi;
+#else
+ Univcoord_T *array = sarray->array, value0;
+ Sarrayptr_T *array_stop, *array_end, *array_ptr;
+#endif
Univcoord_T *positions_temp;
#ifdef HAVE_64_BIT
UINT8 pointer;
#else
UINT4 pointer;
#endif
- Sarrayptr_T *array_stop, *array_end, *array_ptr;
Univcoord_T *out;
__m128i converted, adjusted, match;
- __m128i base, floor, ceiling, values, adj, p;
+ __m128i floor, ceiling, values, adj, p;
int matchbits;
-#ifdef REQUIRE_ALIGNMENT
+#ifdef USE_CSA
+#elif defined(REQUIRE_ALIGNMENT)
int n_prealign, k;
#endif
#ifndef USE_SHUFFLE_MASK
__m128i MASTER_CONTROL;
#endif
+#ifdef DEBUG7
+ int i;
+#endif
#ifdef DEBUG8
Univcoord_T *positions_std;
int npositions_std;
#endif
- debug7(printf("Entered Elt_fill_positions_filtered with goal %u, low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n",
+ debug(printf("Entered fill_positions_filtered_first with goal %u, low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n",
+ goal,low,high,this->initptr,this->finalptr,this->finalptr - this->initptr + 1,this->nmatches));
+ debug7(printf("Entered fill_positions_filtered_first with goal %u, low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n",
goal,low,high,this->initptr,this->finalptr,this->finalptr - this->initptr + 1,this->nmatches));
if (this->positions_allocated != NULL) {
@@ -1694,27 +2422,14 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
FREE(this->positions_allocated);
}
- if (this->nmatches == 0 || this->finalptr - this->initptr + 1 > EXCESS_SARRAY_HITS) {
- nmatches = Genome_consecutive_matches_rightward(query_compress,/*left*/goal,/*pos5*/this->querystart,
- /*pos3*/this->queryend + 1,plusp,genestrand,first_read_p);
- debug7(printf("rightward at goal %u from %d to %d shows %d matches (want %d)\n",goal,this->querystart,this->queryend,
- nmatches,this->queryend - this->querystart + 1));
- if (nmatches == this->queryend - this->querystart + 1) {
- /* Create a position that works */
- this->positions_allocated = this->positions = (Univcoord_T *) CALLOC(1,sizeof(Univcoord_T));
- this->positions[0] = goal;
- this->npositions = 1;
- } else {
- this->positions_allocated = this->positions = (Univcoord_T *) NULL;
- this->npositions = 0;
- }
+ if ((this->n_all_positions = this->finalptr - this->initptr + 1) == 0 /*|| this->n_all_positions > EXCESS_SARRAY_HITS*/) {
+ this->all_positions = (Univcoord_T *) NULL;
} else {
-
-#ifdef DEBUG8
- positions_std = fill_positions_std(&npositions_std,/*low_adj*/low + this->querystart,
- /*high_adj*/high + this->querystart,
- this->initptr,this->finalptr,this->querystart,array);
+#ifdef USE_CSA
+ all = this->all_positions = (Univcoord_T *) CALLOC(this->n_all_positions,sizeof(Univcoord_T));
+#else
+ /* For non-CSA, done by calling procedure */
#endif
@@ -1722,16 +2437,17 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
low_adj = low + this->querystart;
high_adj = high + this->querystart;
-
- base = _mm_set1_epi32(2147483648); /* 2^31 */
+
floor = _mm_set1_epi32(low_adj - 1 - 2147483648);
ceiling = _mm_set1_epi32(high_adj + 1 - 2147483648);
adj = _mm_set1_epi32(this->querystart);
- this->npositions = 0;
+ this->npositions_allocated = this->npositions = 0;
+#ifdef USE_CSA
+ ptri = this->initptr;
+#elif defined(REQUIRE_ALIGNMENT)
array_ptr = &(array[this->initptr]);
-#ifdef REQUIRE_ALIGNMENT
/* Initial part */
#ifdef HAVE_64_BIT
n_prealign = ((16 - ((UINT8) array_ptr & 0xF))/4) & 0x3;
@@ -1750,23 +2466,48 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
*out++ = value - this->querystart;
}
}
-#endif
+#else
+ array_ptr = &(array[this->initptr]);
+#endif /* USE_CSA */
+
/* Aligned part */
+#ifdef USE_CSA
+ if (this->finalptr < 4) {
+ stopi = 0;
+ } else {
+ stopi = this->finalptr - 4;
+ }
+ endi = this->finalptr;
+#else
if (this->finalptr < 4) {
array_stop = &(array[0]);
} else {
array_stop = &(array[this->finalptr - 4]);
}
array_end = &(array[this->finalptr]);
+#endif
#ifndef USE_SHUFFLE_MASK
MASTER_CONTROL = _mm_setr_epi8(0x10, 0x12, 0x13, 0x12, 0x40, 0x68, 0x7C, 0x6B,
- 0x00, 0x80, 0xC0, 0xBC, 0x00, 0x00, 0x00, 0xC0);
+ 0x00, 0x80, 0xC0, 0xBC, 0x00, 0x00, 0x00, 0xC0);
#endif
- while (array_ptr < array_stop) {
-#ifdef REQUIRE_ALIGNMENT
+ while (
+#ifdef USE_CSA
+ ptri < stopi
+#else
+ array_ptr < array_stop
+#endif
+ ) {
+
+#ifdef USE_CSA
+ value3 = *all++ = csa_lookup(sarray,ptri);
+ value2 = *all++ = csa_lookup(sarray,ptri+1);
+ value1 = *all++ = csa_lookup(sarray,ptri+2);
+ value0 = *all++ = csa_lookup(sarray,ptri+3);
+ values = _mm_set_epi32(value3,value2,value1,value0);
+#elif defined(REQUIRE_ALIGNMENT)
values = _mm_load_si128((__m128i *) array_ptr);
#else
/* It looks like loadu is just as fast as load */
@@ -1774,7 +2515,7 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
#endif
debug7b(print_vector_uint(values));
- converted = _mm_sub_epi32(values,base);
+ converted = _mm_sub_epi32(values,epi32_convert);
/* match = _mm_andnot_si128(_mm_cmpgt_epi32(floor,converted),_mm_cmpgt_epi32(ceiling,converted)); -- This is off by 1 at floor */
match = _mm_and_si128(_mm_cmpgt_epi32(converted,floor),_mm_cmplt_epi32(converted,ceiling));
debug7b(print_vector_hex(match));
@@ -1794,33 +2535,40 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
debug7b(printf("matchbits: %08X (%d ones)\n",matchbits,_popcnt32(matchbits)));
#elif defined HAVE_MM_POPCNT
out += _mm_popcnt_u32(matchbits);
- debug7b(printf("matchbits: %08X (%d ones)\n",matchbits,_popcnt32(matchbits)));
+ debug7b(printf("matchbits: %08X (%d ones)\n",matchbits,_mm_popcnt_u32(matchbits)));
#else
out += __builtin_popcount(matchbits);
debug7b(printf("matchbits: %08X (%d ones)\n",matchbits,__builtin_popcount(matchbits)));
#endif
}
+#ifdef USE_CSA
+ ptri += 4;
+#else
array_ptr += 4;
+#endif
}
/* Partial block at end; do scalar */
debug7(printf("\nFinal part:\n"));
+#ifdef USE_CSA
+ while (ptri <= endi) {
+ if ((value0 = *all++ = csa_lookup(sarray,ptri++)) >= low_adj && value0 <= high_adj) {
+ *out++ = value0 - this->querystart;
+ }
+ }
+#else
while (array_ptr <= array_end) {
- if ((value = *array_ptr++) >= low_adj && value <= high_adj) {
- *out++ = value - this->querystart;
+ if ((value0 = *array_ptr++) >= low_adj && value0 <= high_adj) {
+ *out++ = value0 - this->querystart;
}
}
+#endif
- this->npositions = out - positions_temp;
-
+ this->npositions_allocated = this->npositions = out - positions_temp;
debug7(printf("SIMD method found %d positions\n",this->npositions));
-#ifdef DEBUG8
- positions_compare(positions_temp,this->npositions,positions_std,npositions_std);
- FREE(positions_std);
-#endif
- /* Copy the positions from temp */
+ /* Copy the positions into heap from temp in stack */
if (this->npositions == 0) {
this->positions_allocated = this->positions = (Univcoord_T *) NULL;
} else {
@@ -1830,37 +2578,50 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
/* Need to copy positions before the goal */
this->positions_allocated = this->positions = MALLOC(this->npositions * sizeof(Univcoord_T));
memcpy(this->positions,positions_temp,this->npositions * sizeof(Univcoord_T));
+#ifdef DEBUG7
+ for (i = 0; i < this->npositions; i++) {
+ printf("%u\n",this->positions[i]);
+ }
+#endif
+#if 0
+ /* Not sure why we were doing this. We will find collinear set of diagonals later. */
/* Advance pointer to goal (note: do not want goal_adj, since we have already subtracted this->querystart) */
/* Have tested positions[i] <= goal, but want positions[-1] to be < goal, or positions[0] >= goal */
/* ? Replace with a binary search */
i = 0;
while (i < this->npositions && positions_temp[i] < goal) {
- debug7(printf("Skipping position %u < goal %u\n",positions_temp[i],goal));
+ debug7(printf("1 Skipping position %u (%u) < goal %u (%u)\n",
+ positions_temp[i],positions_temp[i] - chroffset,goal,goal - chroffset));
i++;
}
this->positions += i;
this->npositions -= i;
debug7(printf("Remaining: %d positions\n",this->npositions));
+#endif
}
FREEA(positions_temp);
}
- this->filledp = true;
-
return;
}
+
#else
+/* Missing SSSE3 or SSE2 */
static void
-Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T low, Univcoord_T high,
- Compress_T query_compress, bool plusp, int genestrand, bool first_read_p) {
+fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T low, Univcoord_T high,
+ Compress_T query_compress, bool plusp, int genestrand, bool first_read_p) {
Sarrayptr_T ptr, lastptr;
int nmatches;
int i;
- Univcoord_T *array = sarray->array, low_adj, high_adj, value;
+ Univcoord_T low_adj, high_adj;
+ Univcoord_T value3, value2, value1, value0;
+#ifndef USE_CSA
+ Univcoord_T *array = sarray->array;
+#endif
Univcoord_T *positions_temp;
#ifdef HAVE_SSE2
#ifdef HAVE_64_BIT
@@ -1868,16 +2629,12 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
#else
UINT4 pointer;
#endif
- __m128i base, floor, ceiling, values, compare;
+ __m128i floor, ceiling, values, compare;
int n_prealign, k;
#endif
-#ifdef DEBUG8
- Univcoord_T *positions_std;
- int npositions_std;
-#endif
- debug7(printf("Entered Elt_fill_positions_filtered with goal %u, low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n",
+ debug7(printf("Entered fill_positions_filtered_first with goal %u, low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n",
goal,low,high,this->initptr,this->finalptr,this->finalptr - this->initptr + 1,this->nmatches));
if (this->positions_allocated != NULL) {
@@ -1885,58 +2642,46 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
FREE(this->positions_allocated);
}
- if (this->nmatches == 0 || this->finalptr - this->initptr + 1 > EXCESS_SARRAY_HITS) {
- nmatches = Genome_consecutive_matches_rightward(query_compress,/*left*/goal,/*pos5*/this->querystart,
- /*pos3*/this->queryend + 1,plusp,genestrand,first_read_p);
- debug7(printf("rightward at goal %u from %d to %d shows %d matches (want %d)\n",goal,this->querystart,this->queryend,
- nmatches,this->queryend - this->querystart + 1));
- if (nmatches == this->queryend - this->querystart + 1) {
- /* Create a position that works */
- this->positions_allocated = this->positions = (Univcoord_T *) CALLOC(1,sizeof(Univcoord_T));
- this->positions[0] = goal;
- this->npositions = 1;
- } else {
- this->positions_allocated = this->positions = (Univcoord_T *) NULL;
- this->npositions = 0;
- }
+ if ((this->n_all_positions = this->finalptr - this->initptr + 1) == 0 /*|| this->n_all_positions > EXCESS_SARRAY_HITS*/) {
+ this->all_positions = (Univcoord_T *) NULL;
} else {
-
-#ifdef DEBUG8
- positions_std = fill_positions_std(&npositions_std,/*low_adj*/low + this->querystart,
- /*high_adj*/high + this->querystart,
- this->initptr,this->finalptr,this->querystart,array);
+#ifdef USE_CSA
+ all = this->all_positions = (Univcoord_T *) CALLOC(this->n_all_positions,sizeof(Univcoord_T));
+#else
+ /* For non-CSA, done by calling procedure */
#endif
-#ifdef HAVE_SSE2
- base = _mm_set1_epi32(2147483648); /* 2^31 */
-#endif
-
positions_temp = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1) * sizeof(Univcoord_T));
low_adj = low + this->querystart;
high_adj = high + this->querystart;
- this->npositions = 0;
+ this->npositions_allocated = this->npositions = 0;
ptr = this->initptr;
#ifdef HAVE_SSE2
if (ptr + 3 > this->finalptr) { /* ptr + 4 > (this->finalptr + 1) */
/* Handle in normal manner */
debug7(printf("Small batch, because %u + 3 <= %u\n",ptr,this->finalptr));
while (ptr <= this->finalptr) {
- debug7a(printf("Looking at value %u, relative to low %u and high %u\n",array[ptr],low_adj,high_adj));
- if ((value = array[ptr++]) < low_adj) {
+ debug7a(printf("Looking at value %u, relative to low %u and high %u\n",csa_lookup(sarray,ptr),low_adj,high_adj));
+ if ((value0 =
+#ifdef USE_CSA
+ *all++ =
+#endif
+ csa_lookup(sarray,ptr++)) < low_adj) {
/* Skip */
- } else if (value > high_adj) {
+ } else if (value0 > high_adj) {
/* Skip */
} else {
- debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value,low_adj,high_adj));
- positions_temp[this->npositions++] = value - this->querystart;
+ debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
+ positions_temp[this->npositions++] = value0 - this->querystart;
}
}
} else {
+#ifndef USE_CSA
#ifdef HAVE_64_BIT
pointer = (UINT8) &(array[ptr]);
#else
@@ -1950,15 +2695,16 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
debug7(printf("Initial part:\n"));
for (k = 0; k < n_prealign; k++) {
debug7a(printf("Looking at value %u, relative to low %u and high %u\n",array[ptr],low_adj,high_adj));
- if ((value = array[ptr++]) < low_adj) {
+ if ((value0 = array[ptr++]) < low_adj) {
/* Skip */
- } else if (value > high_adj) {
+ } else if (value0 > high_adj) {
/* Skip */
} else {
- debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value,low_adj,high_adj));
- positions_temp[this->npositions++] = value - this->querystart;
+ debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
+ positions_temp[this->npositions++] = value0 - this->querystart;
}
}
+#endif
/* Aligned part */
debug7(printf("\nAligned part:\n"));
@@ -1967,40 +2713,98 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
floor = _mm_set1_epi32(low_adj - 1 - 2147483648);
ceiling = _mm_set1_epi32(high_adj + 1 - 2147483648);
while (ptr + 3 <= this->finalptr) { /* ptr + 4 < this->finalptr + 1 */
+#ifdef USE_CSA
+ value3 = *all++ = csa_lookup(sarray,ptr);
+ value2 = *all++ = csa_lookup(sarray,ptr+1);
+ value1 = *all++ = csa_lookup(sarray,ptr+2);
+ value0 = *all++ = csa_lookup(sarray,ptr+3);
+ values = _mm_set_epi32(value3,value2,value1,value0);
+#else
values = _mm_load_si128((__m128i *) &(array[ptr]));
+#endif
debug7a(print_vector_looking(values,low_adj,high_adj));
- values = _mm_sub_epi32(values,base);
+ values = _mm_sub_epi32(values,epi32_convert);
compare = _mm_and_si128(_mm_cmpgt_epi32(values,floor),_mm_cmplt_epi32(values,ceiling));
if (/*cmp*/_mm_movemask_epi8(compare) == 0x0000) {
/* All results are false, indicating no values between low_adj and high_adj (most common case) */
ptr += 4;
} else {
- for (k = 0; k < 4; k++) {
- if ((value = array[ptr++]) < low_adj) {
- /* Skip */
- debug7(printf("Skipping position %u < low %u\n",value,low_adj));
- } else if (value > high_adj) {
- /* Skip */
- debug7(printf("Skipping position %u > high %u\n",value,high_adj));
- } else {
- debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value,low_adj,high_adj));
- positions_temp[this->npositions++] = value - this->querystart;
- }
+#ifndef USE_CSA
+ value3 = array[ptr++];
+#endif
+ if (value3 < low_adj) {
+ /* Skip */
+ debug7(printf("Skipping position %u < low %u\n",value3,low_adj));
+ } else if (value3 > high_adj) {
+ /* Skip */
+ debug7(printf("Skipping position %u > high %u\n",value3,high_adj));
+ } else {
+ debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value3,low_adj,high_adj));
+ positions_temp[this->npositions++] = value3 - this->querystart;
+ }
+
+#ifndef USE_CSA
+ value2 = array[ptr++];
+#endif
+ if (value2 < low_adj) {
+ /* Skip */
+ debug7(printf("Skipping position %u < low %u\n",value2,low_adj));
+ } else if (value2 > high_adj) {
+ /* Skip */
+ debug7(printf("Skipping position %u > high %u\n",value2,high_adj));
+ } else {
+ debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value2,low_adj,high_adj));
+ positions_temp[this->npositions++] = value2 - this->querystart;
+ }
+
+#ifndef USE_CSA
+ value1 = array[ptr++];
+#endif
+ if (value1 < low_adj) {
+ /* Skip */
+ debug7(printf("Skipping position %u < low %u\n",value1,low_adj));
+ } else if (value1 > high_adj) {
+ /* Skip */
+ debug7(printf("Skipping position %u > high %u\n",value1,high_adj));
+ } else {
+ debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value1,low_adj,high_adj));
+ positions_temp[this->npositions++] = value1 - this->querystart;
+ }
+
+#ifndef USE_CSA
+ value0 = array[ptr++];
+#endif
+ if (value0 < low_adj) {
+ /* Skip */
+ debug7(printf("Skipping position %u < low %u\n",value0,low_adj));
+ } else if (value0 > high_adj) {
+ /* Skip */
+ debug7(printf("Skipping position %u > high %u\n",value0,high_adj));
+ } else {
+ debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
+ positions_temp[this->npositions++] = value0 - this->querystart;
}
+#ifdef USE_CSA
+ ptr += 4;
+#endif
}
}
/* Final part */
debug7(printf("\nFinal part:\n"));
while (ptr <= this->finalptr) {
- debug7a(printf("Looking at value %u, relative to low %u and high %u\n",array[ptr],low_adj,high_adj));
- if ((value = array[ptr++]) < low_adj) {
+ debug7a(printf("Looking at value %u, relative to low %u and high %u\n",csa_lookup(sarray,ptr),low_adj,high_adj));
+ if ((value0 =
+#ifdef USE_CSA
+ *all++ =
+#endif
+ csa_lookup(sarray,ptr++)) < low_adj) {
/* Skip */
- } else if (value > high_adj) {
+ } else if (value0 > high_adj) {
/* Skip */
} else {
- debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value,low_adj,high_adj));
- positions_temp[this->npositions++] = value - this->querystart;
+ debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
+ positions_temp[this->npositions++] = value0 - this->querystart;
}
}
}
@@ -2008,23 +2812,23 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
#else
while (ptr <= this->finalptr) {
- debug7a(printf("Looking at value %u, relative to low %u and high %u\n",array[ptr],low_adj,high_adj));
- if ((value = array[ptr++]) < low_adj) {
+ debug7a(printf("Looking at value %u, relative to low %u and high %u\n",csa_lookup(sarray,ptr),low_adj,high_adj));
+ if ((value0 =
+#ifdef USE_CSA
+ *all++ =
+#endif
+ csa_lookup(sarray,ptr++)) < low_adj) {
/* Skip */
- } else if (value > high_adj) {
+ } else if (value0 > high_adj) {
/* Skip */
} else {
- debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value,low_adj,high_adj));
- positions_temp[this->npositions++] = value - this->querystart;
+ debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
+ positions_temp[this->npositions++] = value0 - this->querystart;
}
}
#endif
debug7(printf("SIMD method found %d positions\n",this->npositions));
-#ifdef DEBUG8
- positions_compare(positions_temp,this->npositions,positions_std,npositions_std);
- FREE(positions_std);
-#endif
/* Copy the positions from temp */
if (this->npositions == 0) {
@@ -2037,38 +2841,46 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
this->positions_allocated = this->positions = MALLOC(this->npositions * sizeof(Univcoord_T));
memcpy(this->positions,positions_temp,this->npositions * sizeof(Univcoord_T));
+#if 0
+ /* Not sure why we were doing this. We will find collinear set of diagonals later. */
/* Advance pointer to goal (note: do not want goal_adj, since we have already subtracted this->querystart) */
/* Have tested positions[i] <= goal, but want positions[-1] to be < goal, or positions[0] >= goal */
/* ? Replace with a binary search */
i = 0;
while (i < this->npositions && positions_temp[i] < goal) {
- debug7(printf("Skipping position %u < goal %u\n",positions_temp[i],goal));
+ debug7(printf("2 Skipping position %u < goal %u\n",positions_temp[i] - chroffset,goal - chroffset));
i++;
}
this->positions += i;
this->npositions -= i;
debug7(printf("Remaining: %d positions\n",this->npositions));
+#endif
}
FREEA(positions_temp);
}
- this->filledp = true;
-
return;
}
#endif
#else
+/* Non-ALLOCA version */
static void
-Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T low, Univcoord_T high,
- Compress_T query_compress, bool plusp, int genestrand, bool first_read_p) {
+fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T low, Univcoord_T high,
+ Compress_T query_compress, bool plusp, int genestrand, bool first_read_p) {
Sarrayptr_T ptr, lastptr;
int nmatches;
int i;
- Univcoord_T *array = sarray->array, low_adj, high_adj, value;
+ Univcoord_T low_adj, high_adj;
+ Univcoord_T value3, value2, value1, value0;
+#ifdef USE_CSA
+ Sarrayptr_T stopi, endi, ptri, *all;
+#else
+ Univcoord_T *array = sarray->array;
+#endif
Univcoord_T *more_positions;
#ifdef HAVE_SSE2
#ifdef HAVE_64_BIT
@@ -2076,16 +2888,12 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
#else
UINT4 pointer;
#endif
- __m128i base, floor, ceiling, values, compare;
+ __m128i floor, ceiling, values, compare;
int n_prealign, k;
#endif
-#ifdef DEBUG8
- Univcoord_T *positions_std;
- int npositions_std;
-#endif
- debug7(printf("Entered Elt_fill_positions_filtered with goal %u, low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n",
+ debug7(printf("Entered fill_positions_filtered_first with goal %u, low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n",
goal,low,high,this->initptr,this->finalptr,this->finalptr - this->initptr + 1,this->nmatches));
if (this->positions_allocated != NULL) {
@@ -2093,62 +2901,51 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
FREE(this->positions_allocated);
}
- if (this->nmatches == 0 || this->finalptr - this->initptr + 1 > EXCESS_SARRAY_HITS) {
- nmatches = Genome_consecutive_matches_rightward(query_compress,/*left*/goal,/*pos5*/this->querystart,
- /*pos3*/this->queryend + 1,plusp,genestrand,first_read_p);
- debug7(printf("rightward at goal %u from %d to %d shows %d matches (want %d)\n",goal,this->querystart,this->queryend,
- nmatches,this->queryend - this->querystart + 1));
- if (nmatches == this->queryend - this->querystart + 1) {
- /* Create a position that works */
- this->positions_allocated = this->positions = (Univcoord_T *) CALLOC(1,sizeof(Univcoord_T));
- this->positions[0] = goal;
- this->npositions = 1;
- } else {
- this->positions_allocated = this->positions = (Univcoord_T *) NULL;
- this->npositions = 0;
- }
- } else {
+ if ((this->n_all_positions = this->finalptr - this->initptr + 1) == 0 /*|| this->n_all_positions > EXCESS_SARRAY_HITS*/) {
+ this->all_positions = (Univcoord_T *) NULL;
-#ifdef DEBUG8
- positions_std = fill_positions_std(&npositions_std,/*low_adj*/low + this->querystart,
- /*high_adj*/high + this->querystart,
- this->initptr,this->finalptr,this->querystart,array);
+ } else {
+#ifdef USE_CSA
+ all = this->all_positions = (Univcoord_T *) CALLOC(this->n_all_positions,sizeof(Univcoord_T));
+#else
+ /* For non-CSA, done by calling procedure */
#endif
-#ifdef HAVE_SSE2
- base = _mm_set1_epi32(2147483648); /* 2^31 */
-#endif
-
/* Guess at allocation size */
this->positions_allocated = this->positions = (Univcoord_T *) CALLOC(GUESS_ALLOCATION,sizeof(Univcoord_T));
low_adj = low + this->querystart;
high_adj = high + this->querystart;
- this->npositions = 0;
+ this->npositions_allocated = this->npositions = 0;
ptr = this->initptr;
#ifdef HAVE_SSE2
if (ptr + 3 > this->finalptr) { /* ptr + 4 > (this->finalptr + 1) */
/* Handle in normal manner */
debug7(printf("Small batch, because %u + 3 <= %u\n",ptr,this->finalptr));
while (ptr <= this->finalptr) {
- debug7a(printf("Looking at value %u, relative to low %u and high %u\n",array[ptr],low_adj,high_adj));
- if ((value = array[ptr++]) < low_adj) {
+ debug7a(printf("Looking at value %u, relative to low %u and high %u\n",csa_lookup(sarray,ptr),low_adj,high_adj));
+ if ((value0 =
+#ifdef USE_CSA
+ *all++ =
+#endif
+ csa_lookup(sarray,ptr++)) < low_adj) {
/* Skip */
- } else if (value > high_adj) {
+ } else if (value0 > high_adj) {
/* Skip */
} else if (this->npositions < GUESS_ALLOCATION) {
- debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value,low_adj,high_adj));
- this->positions[this->npositions++] = value - this->querystart;
+ debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
+ this->positions[this->npositions++] = value0 - this->querystart;
} else {
- debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value,low_adj,high_adj));
+ debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value0,low_adj,high_adj));
this->npositions++;
lastptr = ptr; /* saves us from going through the entire sarray below */
}
}
} else {
+#ifndef USE_CSA
#ifdef HAVE_64_BIT
pointer = (UINT8) &(array[ptr]);
#else
@@ -2162,19 +2959,20 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
debug7(printf("Initial part:\n"));
for (k = 0; k < n_prealign; k++) {
debug7a(printf("Looking at value %u, relative to low %u and high %u\n",array[ptr],low_adj,high_adj));
- if ((value = array[ptr++]) < low_adj) {
+ if ((value0 = array[ptr++]) < low_adj) {
/* Skip */
- } else if (value > high_adj) {
+ } else if (value0 > high_adj) {
/* Skip */
} else if (this->npositions < GUESS_ALLOCATION) {
- debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value,low_adj,high_adj));
- this->positions[this->npositions++] = value - this->querystart;
+ debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
+ this->positions[this->npositions++] = value0 - this->querystart;
} else {
- debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value,low_adj,high_adj));
+ debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value0,low_adj,high_adj));
this->npositions++;
lastptr = ptr; /* saves us from going through the entire sarray below */
}
}
+#endif
/* Aligned part */
debug7(printf("\nAligned part:\n"));
@@ -2183,46 +2981,116 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
floor = _mm_set1_epi32(low_adj - 1 - 2147483648);
ceiling = _mm_set1_epi32(high_adj + 1 - 2147483648);
while (ptr + 3 <= this->finalptr) { /* ptr + 4 < this->finalptr + 1 */
+#ifdef USE_CSA
+ value3 = *all++ = csa_lookup(sarray,ptr);
+ value2 = *all++ = csa_lookup(sarray,ptr+1);
+ value1 = *all++ = csa_lookup(sarray,ptr+2);
+ value0 = *all++ = csa_lookup(sarray,ptr+3);
+ values = _mm_set_epi32(value3,value2,value1,value0);
+#else
values = _mm_load_si128((__m128i *) &(array[ptr]));
+#endif
debug7a(print_vector_looking(values,low_adj,high_adj));
- values = _mm_sub_epi32(values,base);
+ values = _mm_sub_epi32(values,epi32_convert);
compare = _mm_and_si128(_mm_cmpgt_epi32(values,floor),_mm_cmplt_epi32(values,ceiling));
if (/*cmp*/_mm_movemask_epi8(compare) == 0x0000) {
/* All results are false, indicating no values between low_adj and high_adj (most common case) */
ptr += 4;
} else {
- for (k = 0; k < 4; k++) {
- if ((value = array[ptr++]) < low_adj) {
- /* Skip */
- debug7(printf("Skipping position %u < low %u\n",value,low_adj));
- } else if (value > high_adj) {
- /* Skip */
- debug7(printf("Skipping position %u > high %u\n",value,high_adj));
- } else if (this->npositions < GUESS_ALLOCATION) {
- debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value,low_adj,high_adj));
- this->positions[this->npositions++] = value - this->querystart;
- } else {
- debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value,low_adj,high_adj));
- this->npositions++;
- lastptr = ptr; /* saves us from going through the entire sarray below */
- }
+#ifndef USE_CSA
+ value3 = array[ptr++];
+#endif
+ if (value3 < low_adj) {
+ /* Skip */
+ debug7(printf("Skipping position %u < low %u\n",value3,low_adj));
+ } else if (value3 > high_adj) {
+ /* Skip */
+ debug7(printf("Skipping position %u > high %u\n",value3,high_adj));
+ } else if (this->npositions < GUESS_ALLOCATION) {
+ debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value3,low_adj,high_adj));
+ this->positions[this->npositions++] = value3 - this->querystart;
+ } else {
+ debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value3,low_adj,high_adj));
+ this->npositions++;
+ lastptr = ptr; /* saves us from going through the entire sarray below */
+ }
+
+#ifndef USE_CSA
+ value2 = array[ptr++];
+#endif
+ if (value2 < low_adj) {
+ /* Skip */
+ debug7(printf("Skipping position %u < low %u\n",value2,low_adj));
+ } else if (value2 > high_adj) {
+ /* Skip */
+ debug7(printf("Skipping position %u > high %u\n",value2,high_adj));
+ } else if (this->npositions < GUESS_ALLOCATION) {
+ debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value2,low_adj,high_adj));
+ this->positions[this->npositions++] = value2 - this->querystart;
+ } else {
+ debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value2,low_adj,high_adj));
+ this->npositions++;
+ lastptr = ptr; /* saves us from going through the entire sarray below */
+ }
+
+#ifndef USE_CSA
+ value1 = array[ptr++];
+#endif
+ if (value1 < low_adj) {
+ /* Skip */
+ debug7(printf("Skipping position %u < low %u\n",value1,low_adj));
+ } else if (value1 > high_adj) {
+ /* Skip */
+ debug7(printf("Skipping position %u > high %u\n",value1,high_adj));
+ } else if (this->npositions < GUESS_ALLOCATION) {
+ debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value1,low_adj,high_adj));
+ this->positions[this->npositions++] = value1 - this->querystart;
+ } else {
+ debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value1,low_adj,high_adj));
+ this->npositions++;
+ lastptr = ptr; /* saves us from going through the entire sarray below */
+ }
+
+#ifndef USE_CSA
+ value0 = array[ptr++];
+#endif
+ if (value0 < low_adj) {
+ /* Skip */
+ debug7(printf("Skipping position %u < low %u\n",value0,low_adj));
+ } else if (value0 > high_adj) {
+ /* Skip */
+ debug7(printf("Skipping position %u > high %u\n",value0,high_adj));
+ } else if (this->npositions < GUESS_ALLOCATION) {
+ debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
+ this->positions[this->npositions++] = value0 - this->querystart;
+ } else {
+ debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value0,low_adj,high_adj));
+ this->npositions++;
+ lastptr = ptr; /* saves us from going through the entire sarray below */
}
+#ifdef USE_CSA
+ ptr += 4;
+#endif
}
}
/* Final part */
debug7(printf("\nFinal part:\n"));
while (ptr <= this->finalptr) {
- debug7a(printf("Looking at value %u, relative to low %u and high %u\n",array[ptr],low_adj,high_adj));
- if ((value = array[ptr++]) < low_adj) {
+ debug7a(printf("Looking at value %u, relative to low %u and high %u\n",csa_lookup(sarray,ptr),low_adj,high_adj));
+ if ((value0 =
+#ifdef USE_CSA
+ *all++ =
+#endif
+ csa_lookup(sarray,ptr++)) < low_adj) {
/* Skip */
- } else if (value > high_adj) {
+ } else if (value0 > high_adj) {
/* Skip */
} else if (this->npositions < GUESS_ALLOCATION) {
- debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value,low_adj,high_adj));
- this->positions[this->npositions++] = value - this->querystart;
+ debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
+ this->positions[this->npositions++] = value0 - this->querystart;
} else {
- debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value,low_adj,high_adj));
+ debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value0,low_adj,high_adj));
this->npositions++;
lastptr = ptr; /* saves us from going through the entire sarray below */
}
@@ -2232,16 +3100,20 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
#else
while (ptr <= this->finalptr) {
- debug7a(printf("Looking at value %u, relative to low %u and high %u\n",array[ptr],low_adj,high_adj));
- if ((value = array[ptr++]) < low_adj) {
+ debug7a(printf("Looking at value %u, relative to low %u and high %u\n",csa_lookup(sarray,ptr),low_adj,high_adj));
+ if ((value0 =
+#ifdef USE_CSA
+ *all++ =
+#endif
+ csa_lookup(sarray,ptr++)) < low_adj) {
/* Skip */
- } else if (value > high_adj) {
+ } else if (value0 > high_adj) {
/* Skip */
} else if (this->npositions < GUESS_ALLOCATION) {
- debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value,low_adj,high_adj));
- this->positions[this->npositions++] = value - this->querystart;
+ debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
+ this->positions[this->npositions++] = value0 - this->querystart;
} else {
- debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value,low_adj,high_adj));
+ debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value0,low_adj,high_adj));
this->npositions++;
lastptr = ptr; /* saves us from going through the entire sarray below */
}
@@ -2263,16 +3135,17 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
#ifdef HAVE_SSE2
if (this->initptr + 4 < ptr) {
while (i < this->npositions) {
- if ((value = array[--ptr]) < low_adj) {
+ if ((value0 = csa_lookup(sarray,--ptr)) < low_adj) {
/* Skip */
- } else if (value > high_adj) {
+ } else if (value0 > high_adj) {
/* Skip */
} else {
- this->positions[i++] = value - this->querystart;
+ this->positions[i++] = value0 - this->querystart;
}
}
} else {
+#ifndef USE_CSA
#ifdef HAVE_64_BIT
pointer = (UINT8) &(array[ptr]);
#else
@@ -2284,44 +3157,91 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
/* Initial part */
while (i < this->npositions) {
- if ((value = array[--ptr]) < low_adj) {
+ if ((value0 = array[--ptr]) < low_adj) {
/* Skip */
- } else if (value > high_adj) {
+ } else if (value0 > high_adj) {
/* Skip */
} else {
- this->positions[i++] = value - this->querystart;
+ this->positions[i++] = value0 - this->querystart;
}
}
+#endif
/* Aligned part */
while (i < this->npositions && this->initptr + 4 < ptr) {
+#ifdef USE_CSA
+ value3 = csa_lookup(sarray,ptr-4);
+ value2 = csa_lookup(sarray,ptr-3);
+ value1 = csa_lookup(sarray,ptr-2);
+ value0 = csa_lookup(sarray,ptr-1);
+ values = _mm_set_epi32(value3,value2,value1,value0);
+#else
values = _mm_load_si128((__m128i *) &(array[ptr-4]));
- values = _mm_sub_epi32(values,base);
+#endif
+ values = _mm_sub_epi32(values,epi32_convert);
compare = _mm_and_si128(_mm_cmpgt_epi32(values,floor),_mm_cmplt_epi32(values,ceiling));
if (/*cmp*/_mm_movemask_epi8(compare) == 0x0000) {
/* All results are false, indicating no values between low_adj and high_adj (most common case) */
ptr -= 4;
} else {
- for (k = 0; k < 4; k++) {
- if ((value = array[--ptr]) < low_adj) {
- /* Skip */
- } else if (value > high_adj) {
- /* Skip */
- } else {
- this->positions[i++] = value - this->querystart;
- }
+#ifndef USE_CSA
+ value0 = array[--ptr];
+#endif
+ if (value0 < low_adj) {
+ /* Skip */
+ } else if (value0 > high_adj) {
+ /* Skip */
+ } else {
+ this->positions[i++] = value0 - this->querystart;
+ }
+
+#ifndef USE_CSA
+ value1 = array[--ptr];
+#endif
+ if (value1 < low_adj) {
+ /* Skip */
+ } else if (value1 > high_adj) {
+ /* Skip */
+ } else {
+ this->positions[i++] = value1 - this->querystart;
+ }
+
+#ifndef USE_CSA
+ value2 = array[--ptr];
+#endif
+ if (value2 < low_adj) {
+ /* Skip */
+ } else if (value2 > high_adj) {
+ /* Skip */
+ } else {
+ this->positions[i++] = value2 - this->querystart;
+ }
+
+#ifndef USE_CSA
+ value3 = array[--ptr];
+#endif
+ if (value3 < low_adj) {
+ /* Skip */
+ } else if (value3 > high_adj) {
+ /* Skip */
+ } else {
+ this->positions[i++] = value3 - this->querystart;
}
+
+#ifdef USE_CSA
+ ptr -= 4;
+#endif
}
- }
+ }
/* Last part */
while (i < this->npositions) {
- if ((value = array[--ptr]) < low_adj) {
+ if ((value0 = csa_lookup(sarray,--ptr)) < low_adj) {
/* Skip */
- } else if (value > high_adj) {
+ } else if (value0 > high_adj) {
/* Skip */
} else {
- this->positions[i++] = value - this->querystart;
+ this->positions[i++] = value0 - this->querystart;
}
}
}
@@ -2329,90 +3249,42 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
#else
while (i < this->npositions) {
- if ((value = array[--ptr]) < low_adj) {
+ if ((value0 = csa_lookup(sarray,--ptr)) < low_adj) {
/* Skip */
- } else if (value > high_adj) {
+ } else if (value0 > high_adj) {
/* Skip */
} else {
- this->positions[i++] = value - this->querystart;
+ this->positions[i++] = value0 - this->querystart;
}
}
#endif
}
-#ifdef DEBUG8
- positions_compare(this->positions,this->npositions,positions_std,npositions_std);
- FREE(positions_std);
-#endif
-
qsort(this->positions,this->npositions,sizeof(Univcoord_T),Univcoord_compare);
debug7(printf("Sorting %d positions\n",this->npositions));
+#if 0
+ /* Not sure why we were doing this. We will find collinear set of diagonals later. */
/* Advance pointer to goal (note: do not want goal_adj, since we have already subtracted this->querystart) */
/* Have tested positions[i] <= goal, but want positions[-1] to be < goal, or positions[0] >= goal */
i = 0;
while (i < this->npositions && this->positions[i] < goal) {
- debug7(printf("Skipping position %u < goal %u\n",this->positions[i],goal));
+ debug7(printf("3 Skipping position %u < goal %u\n",this->positions[i] - chroffset,goal - chroffset));
i++;
}
this->positions += i;
this->npositions -= i;
debug7(printf("Remaining: %d positions\n",this->npositions));
+#endif
}
- this->filledp = true;
-
return;
}
#endif
-
-static void
-Elt_dump_list (List_T list) {
- List_T p;
- Elt_T elt;
- int maxn = 0, k;
-
- for (p = list; p != NULL; p = p->rest) {
- elt = (Elt_T) p->first;
- if (elt->npositions > maxn) {
- maxn = elt->npositions;
- }
- }
-
- for (k = 0; k < maxn /* && k < 100 */; k++) {
- for (p = list; p != NULL; p = p->rest) {
- elt = (Elt_T) p->first;
- if (k >= elt->npositions) {
- printf("\t");
- } else {
- printf("%d..%d:%u\t",elt->querystart,elt->queryend,elt->positions[k]);
- }
- }
- printf("\n");
- }
- printf("\n");
-
- return;
-}
-
-static void
-Elt_dump (Elt_T elt) {
- int k;
-
- printf("Elt with %d positions:\n",elt->npositions);
- for (k = 0; k < elt->npositions; k++) {
- printf(" %d..%d:%u\n",elt->querystart,elt->queryend,elt->positions[k]);
- }
- printf("\n");
-
- return;
-}
-
-
-
+/* ? Returns first entry that is >= goal */
static int
binary_search (int lowi, int highi, Univcoord_T *positions, Univcoord_T goal) {
int middlei;
@@ -2423,7 +3295,7 @@ binary_search (int lowi, int highi, Univcoord_T *positions, Univcoord_T goal) {
middlei = lowi + ((highi - lowi) / 2);
debug10(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n",
lowi,positions[lowi],middlei,positions[middlei],
- highi,positions[highi],goal));
+ highi-1,positions[highi-1],goal));
if (goal < positions[middlei]) {
highi = middlei;
} else if (goal > positions[middlei]) {
@@ -2439,302 +3311,226 @@ binary_search (int lowi, int highi, Univcoord_T *positions, Univcoord_T goal) {
}
-#define add_bounded(x,plusterm,highbound) ((x + (plusterm) >= highbound) ? (highbound - 1) : x + (plusterm))
-#define subtract_bounded(x,minusterm,lowbound) ((x < lowbound + (minusterm)) ? lowbound : x - (minusterm))
+/* Used upon second call to Elt_fill_positions_filtered */
+static void
+fill_positions_filtered_again (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T low, Univcoord_T high,
+ Compress_T query_compress, bool plusp, int genestrand, bool first_read_p) {
+ int lowi, highi, i;
-/* Taken from stage1hr.c identify_multimiss_iter */
-static bool
-extend_rightward (Univcoord_T goal, Univcoord_T chroffset, Univcoord_T chrhigh,
- List_T set, Compress_T query_compress,
- T sarray, bool plusp, int genestrand, bool first_read_p, int best_queryend) {
- Elt_T elt;
- Univcoord_T low, high;
-
- debug7(printf("extend_rightward, with goal %u\n",goal));
- for ( ; set /* != NULL */; set = set->rest) {
- debug7(Elt_dump_list(set));
- elt = (Elt_T) set->first;
+ debug(printf("Entered fill_positions_filtered_again with goal %u, low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n",
+ goal,low,high,this->initptr,this->finalptr,this->finalptr - this->initptr + 1,this->nmatches));
- debug7(printf("remaining elts %d: ",List_length(set)));
- debug7(printf("%d..%d\n",elt->querystart,elt->queryend));
- if (elt->querystart > best_queryend) {
- /* Allow for deletion with higher goal */
- low = subtract_bounded(goal,/*minusterm*/max_insertionlen,chroffset);
- high = add_bounded(goal,/*plusterm*/overall_max_distance,chrhigh);
- Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,first_read_p);
- debug7(printf("Allow for deletion with higher goal: %d positions\n",elt->npositions));
-
- if (elt->npositions <= 0) {
- /* List is empty, so one more miss seen. */
- debug7(printf(" positions empty, so not spanning\n"));
- return false;
-
- } else if (*elt->positions > high) {
- /* Already advanced past goal, so one more miss seen. */
- debug7(printf(" %u advanced past goal %u + %d, so not spanning\n",*elt->positions,goal,overall_max_distance));
- return false;
+ if (this->positions_allocated != NULL) {
+ /* Filled from a previous call */
+ FREE(this->positions_allocated);
+ }
- } else {
- /* Found goal. Advance past goal and continue with loop. */
- debug7(printf(" advancing\n"));
- ++elt->positions;
- --elt->npositions;
- /* continue */
- }
- } else {
- /* Allow for deletion with lower goal */
- low = subtract_bounded(goal,/*minusterm*/overall_max_distance,chroffset);
- high = add_bounded(goal,/*plusterm*/max_insertionlen,chrhigh);
- Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,first_read_p);
- debug7(printf("Allow for deletion with lower goal: %d positions\n",elt->npositions));
-
- if (elt->npositions <= 0) {
- /* List is empty, so test previous one only, which must exist
- since positions had at least one entry. */
- if (elt->positions[-1] >= low) {
- /* Found goal with deletion */
- debug7(printf(" possible deletion, continuing\n"));
- /* continue */
- } else {
- debug7(printf(" previous %u before goal %u - %d, so not spanning\n",elt->positions[-1],goal,shortsplicedist));
- return false;
- }
-
- } else if (elt->positions == elt->positions_allocated) {
- /* List is at beginning, so test current one only, not the previous one */
- if (*elt->positions > goal) {
- /* Already advanced past goal, so one more miss seen. */
- debug7(printf(" %u advanced past goal_high %u, so not spanning\n",*elt->positions,goal));
- return false;
- } else {
- /* Found goal. Advance past goal and continue with loop. */
- debug7(printf(" advancing\n"));
- ++elt->positions;
- --elt->npositions;
- /* continue */
- }
+ if (this->n_all_positions == 0) {
+ this->positions_allocated = this->positions = (Univcoord_T *) NULL;
+ this->npositions_allocated = this->npositions = 0;
- } else {
- /* Test both current one (for goal) and previous one (for deletion) */
- if (*elt->positions == goal) {
- /* Found goal. Advance past goal and continue with loop. */
- debug7(printf(" advancing\n"));
- ++elt->positions;
- --elt->npositions;
- /* continue */
-
- } else if (elt->positions[-1] >= low) {
- /* Found goal with deletion */
- debug7(printf(" possible deletion, continuing\n"));
- /* continue */
+ } else {
+ /* low_adj and high_adj are inclusive */
+ lowi = binary_search(/*lowi*/0,/*highi*/this->n_all_positions,this->all_positions,/*goal*/low + this->querystart);
+ highi = binary_search(lowi,/*highi*/this->n_all_positions,this->all_positions,/*goal*/high + this->querystart + 1) - 1;
+ if ((this->npositions_allocated = this->npositions = highi - lowi + 1) == 0) {
+ this->positions_allocated = this->positions = (Univcoord_T *) NULL;
- } else {
- debug7(printf(" %u advanced past goal %u, and previous %u before goal %u - %d, so not spanning\n",
- *elt->positions,goal,elt->positions[-1],goal,overall_max_distance));
- return false;
- }
+ } else {
+ this->positions_allocated = this->positions = (Univcoord_T *) MALLOC(this->npositions * sizeof(Univcoord_T));
+ memcpy(this->positions,&(this->all_positions[lowi]),this->npositions*sizeof(Univcoord_T));
+ for (i = 0; i < this->npositions; i++) {
+ this->positions[i] -= this->querystart;
}
}
}
- debug7(printf("Returning true\n"));
- return true;
+ return;
}
-/* Taken from stage1hr.c identify_multimiss_iter */
-static bool
-extend_leftward (Univcoord_T goal, Univcoord_T chroffset, Univcoord_T chrhigh,
- List_T set, char *queryptr, Compress_T query_compress,
- T sarray, bool plusp, int genestrand, bool first_read_p, char conversion[],
- int best_querystart, int best_queryend) {
- Elt_T elt;
- UINT4 nmatches;
- Sarrayptr_T initptr, finalptr;
- bool successp;
- UINT4 queryend, querypos;
- Univcoord_T low, high;
+static void
+Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T low, Univcoord_T high,
+ Compress_T query_compress, bool plusp, int genestrand, bool first_read_p,
+ bool multiplep) {
+ int nmatches;
+#ifdef DEBUG8
+ Univcoord_T *positions_std;
+ int npositions_std;
+#endif
+ int i;
- debug7(printf("extend_leftward, plusp %d, with goal %u, querystart..queryend %d..%d\n",
- plusp,goal,best_querystart,best_queryend));
- queryend = best_querystart - 2;
+ if (this->nmatches == 0 || this->finalptr - this->initptr + 1 > EXCESS_SARRAY_HITS) {
+ /* Check for an extension */
+ nmatches = Genome_consecutive_matches_rightward(query_compress,/*left*/goal,/*pos5*/this->querystart,
+ /*pos3*/this->queryend + 1,plusp,genestrand,first_read_p);
+ debug7(printf("rightward at goal %u from %d to %d shows %d matches (want %d)\n",goal,this->querystart,this->queryend,
+ nmatches,this->queryend - this->querystart + 1));
- for ( ; set /* != NULL */; set = set->rest) {
- debug7(Elt_dump_list(set));
- elt = (Elt_T) set->first;
- debug7(printf("remaining elts %d: ",List_length(set)));
- debug7(printf("%d..%d\n",elt->querystart,elt->queryend));
-
- debug7(printf("Checking for re-compute of left region: elt->queryend %d vs queryend %d\n",elt->queryend,queryend));
- if (/* elt->queryend != queryend && */ elt->queryend > queryend) {
- debug7(printf("Re-computing left region\n"));
- querypos = elt->querystart;
-
- sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
- /*querylength*/(queryend + 1) - querypos,/*queryoffset*/querypos,
- query_compress,sarray,plusp,genestrand,first_read_p,conversion);
- Elt_replace(elt,querypos,nmatches,initptr,finalptr);
- /* set->first = (void *) elt; */
- }
- queryend = elt->querystart - 2;
-
- debug7(printf("remaining elts %d: ",List_length(set)));
- debug7(printf("%d..%d\n",elt->querystart,elt->queryend));
- if (elt->querystart > best_queryend) {
- /* Allow for deletion with higher goal */
- debug7(printf("Allow for deletion with higher goal: %d positions\n",elt->npositions));
- low = subtract_bounded(goal,/*minusterm*/max_insertionlen,chroffset);
- high = add_bounded(goal,/*plusterm*/overall_max_distance,chrhigh);
- Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,first_read_p);
- if (elt->npositions <= 0) {
- /* List is empty, so one more miss seen. */
- debug7(printf(" positions empty, so not spanning\n"));
- return false;
-
- } else if (*elt->positions > high) {
- /* Already advanced past goal, so one more miss seen. */
- debug7(printf(" %u advanced past goal %u + %d, so not spanning\n",*elt->positions,goal,overall_max_distance));
- return false;
+ if (this->positions_allocated != NULL) {
+ /* Filled from a previous call */
+ FREE(this->positions_allocated);
+ }
- } else {
- /* Found goal. Advance past goal and continue with loop. */
- debug7(printf(" advancing\n"));
- if ((nmatches = Genome_consecutive_matches_leftward(query_compress,/*left*/*elt->positions,
- /*pos5*/0,/*pos3*/elt->querystart,
- plusp,genestrand,first_read_p)) > 0) {
- debug7(printf(" extending querystart %d leftward by %d matches\n",elt->querystart,nmatches));
- elt->querystart -= nmatches;
- queryend = elt->querystart - 2;
- }
- ++elt->positions;
- --elt->npositions;
- /* continue */
- }
+ if (nmatches == this->queryend - this->querystart + 1) {
+ /* Create a position that works */
+ this->positions_allocated = this->positions = (Univcoord_T *) CALLOC(1,sizeof(Univcoord_T));
+ this->positions[0] = goal;
+ this->npositions_allocated = this->npositions = 1;
} else {
- /* Allow for deletion with lower goal */
- debug7(printf("Allow for deletion with lower goal: %d positions\n",elt->npositions));
- low = subtract_bounded(goal,/*minusterm*/overall_max_distance,chroffset);
- high = add_bounded(goal,/*plusterm*/max_insertionlen,chrhigh);
- Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,first_read_p);
- if (elt->npositions <= 0 && elt->positions == elt->positions_allocated) {
- /* List is empty, and no previous one exists */
- debug7(printf(" list is empty and no previous, so not spanning\n"));
- return false;
-
- } else if (elt->npositions <= 0) {
- /* List is empty, but previous one exists */
- if (elt->positions[-1] >= low) {
- /* Found goal with deletion */
- debug7(printf(" possible deletion, continuing\n"));
- if ((nmatches = Genome_consecutive_matches_leftward(query_compress,/*left*/elt->positions[-1],
- /*pos5*/0,/*pos3*/elt->querystart,
- plusp,genestrand,first_read_p)) > 0) {
- debug7(printf(" extending querystart %d leftward by %d matches\n",elt->querystart,nmatches));
- elt->querystart -= nmatches;
- queryend = elt->querystart - 2;
- }
- /* continue */
- } else {
- debug7(printf(" previous %u before goal %u - %d, so not spanning\n",elt->positions[-1],goal,overall_max_distance));
- return false;
- }
-
- } else if (elt->positions == elt->positions_allocated) {
- /* List is at beginning, but current one exists */
- if (*elt->positions > goal) {
- /* Already advanced past goal, so one more miss seen. */
- debug7(printf(" %u advanced past goal_high %u, so not spanning\n",*elt->positions,goal));
- return false;
- } else {
- /* Found goal. Advance past goal and continue with loop. */
- debug7(printf(" advancing\n"));
- if ((nmatches = Genome_consecutive_matches_leftward(query_compress,/*left*/*elt->positions,
- /*pos5*/0,/*pos3*/elt->querystart,
- plusp,genestrand,first_read_p)) > 0) {
- debug7(printf(" extending querystart %d leftward by %d matches\n",elt->querystart,nmatches));
- elt->querystart -= nmatches;
- queryend = elt->querystart - 2;
- }
- ++elt->positions;
- --elt->npositions;
- /* continue */
- }
-
- } else {
- /* Test both current one (for goal) and previous one (for deletion) */
- if (*elt->positions == goal) {
- /* Found goal. Advance past goal and continue with loop. */
- debug7(printf(" advancing\n"));
- if ((nmatches = Genome_consecutive_matches_leftward(query_compress,/*left*/*elt->positions,
- /*pos5*/0,/*pos3*/elt->querystart,
- plusp,genestrand,first_read_p)) > 0) {
- debug7(printf(" extending querystart %d leftward by %d matches\n",elt->querystart,nmatches));
- elt->querystart -= nmatches;
- queryend = elt->querystart - 2;
- }
- ++elt->positions;
- --elt->npositions;
- /* continue */
-
- } else if (elt->positions[-1] >= low) {
- /* Found goal with deletion */
- debug7(printf(" possible deletion, continuing\n"));
- if ((nmatches = Genome_consecutive_matches_leftward(query_compress,/*left*/elt->positions[-1],
- /*pos5*/0,/*pos3*/elt->querystart,
- plusp,genestrand,first_read_p)) > 0) {
- debug7(printf(" extending querystart %d leftward by %d matches\n",elt->querystart,nmatches));
- elt->querystart -= nmatches;
- queryend = elt->querystart - 2;
- }
- /* continue */
+ this->positions_allocated = this->positions = (Univcoord_T *) NULL;
+ this->npositions_allocated = this->npositions = 0;
+ }
+ return; /* Don't even try other methods */
- } else {
- debug7(printf(" %u advanced past goal %u, and previous %u before goal %u - %d, so not spanning\n",
- *elt->positions,goal,elt->positions[-1],goal,overall_max_distance));
- return false;
- }
- }
+#ifndef USE_CSA
+ } else if (multiplep == true) {
+ if (this->status == ELT_VIRGIN) {
+ /* Just go directly to sorting method, and skip SIMD filtering method */
+ this->status = ELT_UNSORTED;
}
+#endif
}
- debug7(printf("Returning true\n"));
- return true;
-}
-
+ if (this->status == ELT_VIRGIN) {
+ fill_positions_filtered_first(this,sarray,goal,low,high,query_compress,plusp,genestrand,first_read_p);
+#if USE_CSA
+ this->status = ELT_UNSORTED;
+#else
+ if (this->finalptr - this->initptr + 1 > EXCESS_SARRAY_HITS) {
+ /* Just keep filtering using SIMD method */
+ this->all_positions = (Univcoord_T *) NULL;
+ this->n_all_positions = 0;
+ } else {
+ this->status = ELT_UNSORTED;
+ }
+#endif
+ } else if (this->status == ELT_UNSORTED) {
+#ifdef USE_CSA
+ if (this->n_all_positions > 0) {
+ qsort(this->all_positions,this->n_all_positions,sizeof(Univcoord_T),Univcoord_compare);
+ }
+#else
+ if ((this->n_all_positions = this->finalptr - this->initptr + 1) == 0 /*|| this->npositions > EXCESS_SARRAY_HITS*/) {
+ this->all_positions = (Univcoord_T *) NULL;
+ this->n_all_positions = 0;
+ } else {
+ this->all_positions = (Univcoord_T *) MALLOC(this->n_all_positions*sizeof(Univcoord_T));
+ memcpy(this->all_positions,&(sarray->array[this->initptr]),this->n_all_positions*sizeof(Univcoord_T));
+ qsort(this->all_positions,this->n_all_positions,sizeof(Univcoord_T),Univcoord_compare);
+ }
+#endif
+#ifdef DEBUG10
+ for (i = 0; i < this->n_all_positions; i++) {
+ printf("%d: %u\n",i,this->all_positions[i]);
+ }
+ printf("\n");
+#endif
-static int
-donor_match_length_cmp (const void *a, const void *b) {
- Stage3end_T x = * (Stage3end_T *) a;
- Stage3end_T y = * (Stage3end_T *) b;
-
- int x_length = Substring_match_length_orig(Stage3end_substring_donor(x));
- int y_length = Substring_match_length_orig(Stage3end_substring_donor(y));
+ fill_positions_filtered_again(this,sarray,goal,low,high,query_compress,plusp,genestrand,first_read_p);
+ this->status = ELT_SORTED;
- if (x_length < y_length) {
- return -1;
- } else if (y_length < x_length) {
- return +1;
} else {
- return 0;
+ /* ELT_SORTED */
+ fill_positions_filtered_again(this,sarray,goal,low,high,query_compress,plusp,genestrand,first_read_p);
}
+
+#ifdef DEBUG8
+ positions_std = fill_positions_std(&npositions_std,/*low_adj*/low + this->querystart,
+ /*high_adj*/high + this->querystart,
+ this->initptr,this->finalptr,this->querystart,sarray->array);
+ positions_compare(this->positions_allocated,this->npositions_allocated,positions_std,npositions_std);
+ FREE(positions_std);
+#endif
+
+ return;
}
-static int
-acceptor_match_length_cmp (const void *a, const void *b) {
- Stage3end_T x = * (Stage3end_T *) a;
- Stage3end_T y = * (Stage3end_T *) b;
-
- int x_length = Substring_match_length_orig(Stage3end_substring_acceptor(x));
- int y_length = Substring_match_length_orig(Stage3end_substring_acceptor(y));
- if (x_length < y_length) {
- return -1;
- } else if (y_length < x_length) {
+static void
+Elt_dump_list (List_T list) {
+ List_T p;
+ Elt_T elt;
+ int maxn = 0, k;
+
+ for (p = list; p != NULL; p = p->rest) {
+ elt = (Elt_T) p->first;
+ if (elt->npositions > maxn) {
+ maxn = elt->npositions;
+ }
+ }
+
+ for (k = 0; k < maxn /* && k < 100 */; k++) {
+ for (p = list; p != NULL; p = p->rest) {
+ elt = (Elt_T) p->first;
+ if (k >= elt->npositions) {
+ printf("\t");
+ } else {
+ printf("%d..%d:%u\t",elt->querystart,elt->queryend,elt->positions[k]);
+ }
+ }
+ printf("\n");
+ }
+ printf("\n");
+
+ return;
+}
+
+static void
+Elt_dump (Elt_T elt) {
+ int k;
+
+ printf("Elt %d..%d (SA %u+%d) with %d positions:\n",
+ elt->querystart,elt->queryend,elt->initptr,elt->finalptr - elt->initptr,elt->npositions);
+ for (k = 0; k < elt->npositions; k++) {
+ printf(" %u\n",elt->positions[k]);
+ }
+ printf("\n");
+
+ return;
+}
+
+
+
+#define add_bounded(x,plusterm,highbound) ((x + (plusterm) >= highbound) ? (highbound - 1) : x + (plusterm))
+#define subtract_bounded(x,minusterm,lowbound) ((x < lowbound + (minusterm)) ? lowbound : x - (minusterm))
+
+
+/* Copied to stage1hr.c */
+static int
+donor_match_length_cmp (const void *a, const void *b) {
+ Stage3end_T x = * (Stage3end_T *) a;
+ Stage3end_T y = * (Stage3end_T *) b;
+
+ int x_length = Substring_match_length_orig(Stage3end_substring_donor(x));
+ int y_length = Substring_match_length_orig(Stage3end_substring_donor(y));
+
+ if (x_length < y_length) {
+ return -1;
+ } else if (y_length < x_length) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+/* Copied to stage1hr.c */
+static int
+acceptor_match_length_cmp (const void *a, const void *b) {
+ Stage3end_T x = * (Stage3end_T *) a;
+ Stage3end_T y = * (Stage3end_T *) b;
+
+ int x_length = Substring_match_length_orig(Stage3end_substring_acceptor(x));
+ int y_length = Substring_match_length_orig(Stage3end_substring_acceptor(y));
+
+ if (x_length < y_length) {
+ return -1;
+ } else if (y_length < x_length) {
return +1;
} else {
return 0;
@@ -2742,13 +3538,20 @@ acceptor_match_length_cmp (const void *a, const void *b) {
}
-static void
-collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous, List_T *singlesplicing,
- List_T *doublesplicing, int querystart_same, int queryend_same,
- Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
- Chrpos_T chrlength, Univcoord_T goal,
- List_T rightward_set, List_T leftward_set, int querylength, Compress_T query_compress,
- bool plusp, int genestrand, bool first_read_p, int nmisses_allowed) {
+/* Also defined in stage1hr.c */
+#define add_bounded(x,plusterm,highbound) ((x + (plusterm) >= highbound) ? (highbound - 1) : x + (plusterm))
+#define subtract_bounded(x,minusterm,lowbound) ((x < lowbound + (minusterm)) ? lowbound : x - (minusterm))
+
+#if 0
+/* Previously called collect_elt_matches */
+static bool
+solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous, List_T *singlesplicing,
+ int querystart_same, int queryend_same,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
+ Chrpos_T chrlength, Univcoord_T goal, List_T rightward_set, List_T leftward_set,
+ int querylength, Compress_T query_compress,
+ bool plusp, int genestrand, bool first_read_p, int nmisses_allowed) {
+ bool twopartp = false;
List_T set, p;
Stage3end_T hit, *hitarray;
Elt_T elt;
@@ -2759,7 +3562,6 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
int querystart_diff, queryend_diff, indel_pos;
#if 0
int nmismatches1, nmismatches2;
- int query_indel_pos;
#endif
List_T accepted_hits, rejected_hits;
@@ -2771,7 +3573,6 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
double best_prob, prob;
Substring_T donor, acceptor;
- int sensedir;
Uintlist_T ambcoords;
Intlist_T amb_knowni, amb_nmismatches;
Doublelist_T amb_probs;
@@ -2803,10 +3604,10 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
debug7(printf(" successful candidate found\n"));
if (goal < (Univcoord_T) querylength) {
debug7(printf(" Goes over beginning of chromosome\n"));
- return;
+ return false;
} else if (goal + querylength > chrhigh) {
debug7(printf(" Goes over end of chromosome\n"));
- return;
+ return false;
} else {
left = goal /* - querylength */;
}
@@ -2817,7 +3618,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
for (set = rightward_set; set /* != NULL */; set = set->rest) {
elt = (Elt_T) set->first;
debug7(printf("%d..%d:%u vs %u: ",elt->querystart,elt->queryend,elt->positions[-1],goal));
- assert(elt->filledp == true);
+ /* assert(elt->status != ELT_VIRGIN); */
if (elt->positions[-1] == goal) {
debug7(printf("same\n"));
if (elt->querystart < querystart_same) {
@@ -2829,6 +3630,12 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
nsame++;
} else {
+#if 0
+ /* Assertion holds because of values for low and high given to Elt_fill_positions_filtered */
+ assert(elt->positions[-1] + max_insertionlen + overall_max_distance > goal &&
+ elt->positions[-1] < goal + max_insertionlen + overall_max_distance);
+#endif
+
debug7(printf("diff (npositions %d)\n",elt->npositions));
debug7(printf("Pushing position %u\n",elt->positions[-1]));
difflist = Uintlist_push(difflist,elt->positions[-1]);
@@ -2849,7 +3656,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
for (set = leftward_set; set /* != NULL */; set = set->rest) {
elt = (Elt_T) set->first;
debug7(printf("%d..%d:%u vs %u: ",elt->querystart,elt->queryend,elt->positions[-1],goal));
- assert(elt->filledp == true);
+ /* assert(elt->status != ELT_VIRGIN); */
if (elt->positions[-1] == goal) {
debug7(printf("same\n"));
if (elt->querystart < querystart_same) {
@@ -2861,6 +3668,12 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
nsame++;
} else {
+#if 0
+ /* Assertion holds because of values for low and high given to Elt_fill_positions_filtered */
+ assert(elt->positions[-1] + max_insertionlen + overall_max_distance > goal &&
+ elt->positions[-1] < goal + max_insertionlen + overall_max_distance);
+#endif
+
debug7(printf("diff (npositions %d)\n",elt->npositions));
debug7(printf("Pushing position %u\n",elt->positions[-1]));
difflist = Uintlist_push(difflist,elt->positions[-1]);
@@ -2892,13 +3705,14 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
} else {
debug7(printf("Result: successful hit saved\n"));
- debug(printf("1. Reporting hit with %d mismatches vs %d allowed\n",nmismatches,nmisses_allowed));
if ((hit = Stage3end_new_substitution(&(*found_score),nmismatches,
left,/*genomiclength*/querylength,
query_compress,plusp,genestrand,first_read_p,
chrnum,chroffset,chrhigh,chrlength,
/*sarrayp*/true)) != NULL) {
+ debug1(printf("1. Reporting hit with %d mismatches vs %d allowed\n",nmismatches,nmisses_allowed));
*subs = List_push(*subs,(void *) hit);
+ twopartp = true;
}
}
assert(difflist == NULL);
@@ -2946,8 +3760,8 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
j++;
}
}
- segmenti_donor_knownpos[segmenti_donor_nknown] = querylength;
- segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = querylength;
+ segmenti_donor_knownpos[segmenti_donor_nknown] = querylength + 100;
+ segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = querylength + 100;
segmentj_acceptor_nknown = segmentj_antidonor_nknown = 0;
if (nsplicesites > 0 &&
@@ -2966,10 +3780,15 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
j++;
}
}
- segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = querylength;
- segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength;
+ segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = querylength + 100;
+ segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength + 100;
/* nspliceends = 0; */
+ assert(segmenti_donor_knownpos[0] == querylength);
+ assert(segmentj_acceptor_knownpos[0] == querylength);
+ assert(segmentj_antidonor_knownpos[0] == querylength);
+ assert(segmenti_antiacceptor_knownpos[0] == querylength);
+
spliceends_sense =
Splice_solve_single_sense(&(*found_score),&nspliceends_sense,spliceends_sense,&lowprob,
&segmenti_usedp,&segmentj_usedp,
@@ -2986,6 +3805,12 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
splicing_penalty,/*max_mismatches_allowed*/1000,
plusp,genestrand,first_read_p,/*subs_or_indels_p*/false,
/*sarrayp*/true);
+
+ assert(segmenti_donor_knownpos[0] == querylength);
+ assert(segmentj_acceptor_knownpos[0] == querylength);
+ assert(segmentj_antidonor_knownpos[0] == querylength);
+ assert(segmenti_antiacceptor_knownpos[0] == querylength);
+
spliceends_antisense =
Splice_solve_single_antisense(&(*found_score),&nspliceends_antisense,spliceends_antisense,&lowprob,
&segmenti_usedp,&segmentj_usedp,
@@ -3029,11 +3854,13 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
/*indel_penalty*/2,/*sarrayp*/true)) != NULL) {
debug7(printf("successful"));
*indels = List_push(*indels,(void *) hit);
+ twopartp = true;
}
#else
*indels = Indel_solve_middle_deletion(&foundp,&(*found_score),&nhits,*indels,
/*left*/left1,chrnum,chroffset,chrhigh,chrlength,
- /*indels*/-nindels,query_compress,querylength,nmisses_allowed,
+ /*indels*/-nindels,query_compress,querylength,
+ nmisses_allowed,
plusp,genestrand,first_read_p,/*sarray*/true);
debug7(
if (foundp == true) {
@@ -3068,6 +3895,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
/*indel_penalty*/2,/*sarrayp*/true)) != NULL) {
debug7(printf("successful"));
*indels = List_push(*indels,(void *) hit);
+ twopartp = true;
}
#else
*indels = Indel_solve_middle_insertion(&foundp,&(*found_score),&nhits,*indels,
@@ -3230,6 +4058,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
/*amb_probs_donor*/NULL,amb_probs,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
Stage3end_sensedir(hit),/*sarrayp*/true));
+ twopartp = true;
Doublelist_free(&amb_probs);
Intlist_free(&amb_nmismatches);
Intlist_free(&amb_knowni);
@@ -3293,6 +4122,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
amb_probs,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
Stage3end_sensedir(hit),/*sarrayp*/true));
+ twopartp = true;
Doublelist_free(&amb_probs);
Intlist_free(&amb_nmismatches);
Intlist_free(&amb_knowni);
@@ -3463,6 +4293,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
/*amb_probs_donor*/NULL,amb_probs,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
Stage3end_sensedir(hit),/*sarrayp*/true));
+ twopartp = true;
Doublelist_free(&amb_probs);
Intlist_free(&amb_nmismatches);
Intlist_free(&amb_knowni);
@@ -3526,6 +4357,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
amb_probs,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
Stage3end_sensedir(hit),/*sarrayp*/true));
+ twopartp = true;
Doublelist_free(&amb_probs);
Intlist_free(&amb_nmismatches);
Intlist_free(&amb_knowni);
@@ -3600,8 +4432,8 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
j++;
}
}
- segmenti_donor_knownpos[segmenti_donor_nknown] = querylength;
- segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = querylength;
+ segmenti_donor_knownpos[segmenti_donor_nknown] = querylength + 100;
+ segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = querylength + 100;
segmentj_acceptor_nknown = segmentj_antidonor_nknown = 0;
if (nsplicesites > 0 &&
@@ -3620,8 +4452,8 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
j++;
}
}
- segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = querylength;
- segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength;
+ segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = querylength + 100;
+ segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength + 100;
/* nspliceends = 0; */
spliceends_sense =
@@ -3683,11 +4515,13 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
/*indel_penalty*/2,/*sarrayp*/true)) != NULL) {
debug7(printf("successful"));
*indels = List_push(*indels,(void *) hit);
+ twopartp = true;
}
#else
*indels = Indel_solve_middle_deletion(&foundp,&(*found_score),&nhits,*indels,
/*left*/left1,chrnum,chroffset,chrhigh,chrlength,
- /*indels*/-nindels,query_compress,querylength,nmisses_allowed,
+ /*indels*/-nindels,query_compress,querylength,
+ nmisses_allowed,
plusp,genestrand,first_read_p,/*sarray*/true);
debug7(
if (foundp == true) {
@@ -3722,6 +4556,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
/*indel_penalty*/2,/*sarrayp*/true)) != NULL) {
debug7(printf("successful"));
*indels = List_push(*indels,(void *) hit);
+ twopartp = true;
}
#else
*indels = Indel_solve_middle_insertion(&foundp,&(*found_score),&nhits,*indels,
@@ -3886,6 +4721,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
/*amb_probs_donor*/NULL,amb_probs,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
Stage3end_sensedir(hit),/*sarrayp*/true));
+ twopartp = true;
Doublelist_free(&amb_probs);
Intlist_free(&amb_nmismatches);
Intlist_free(&amb_knowni);
@@ -3950,6 +4786,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
amb_probs,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
Stage3end_sensedir(hit),/*sarrayp*/true));
+ twopartp = true;
Doublelist_free(&amb_probs);
Intlist_free(&amb_nmismatches);
Intlist_free(&amb_knowni);
@@ -4118,6 +4955,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
/*amb_probs_donor*/NULL,amb_probs,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
Stage3end_sensedir(hit),/*sarrayp*/true));
+ twopartp = true;
Doublelist_free(&amb_probs);
Intlist_free(&amb_nmismatches);
Intlist_free(&amb_knowni);
@@ -4181,6 +5019,7 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
amb_probs,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
Stage3end_sensedir(hit),/*sarrayp*/true));
+ twopartp = true;
Doublelist_free(&amb_probs);
Intlist_free(&amb_nmismatches);
Intlist_free(&amb_knowni);
@@ -4217,673 +5056,3326 @@ collect_elt_matches (int *found_score, List_T *subs, List_T *indels, List_T *amb
Uintlist_free(&difflist);
}
- return;
+ return twopartp;
}
+#endif
-void
-Sarray_search_greedy (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous, List_T *singlesplicing,
- List_T *doublesplicing, char *queryuc_ptr, char *queryrc, int querylength,
- Compress_T query_compress_fwd, Compress_T query_compress_rev,
- int nmisses_allowed, int genestrand, bool first_read_p) {
- List_T plus_set, minus_set, p;
- List_T rightward_set, leftward_set;
- Elt_T best_plus_elt, best_minus_elt, elt, *array;
- UINT4 best_plus_nmatches, best_minus_nmatches, nmatches;
+static int
+get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T *best_left_diagonals,
+ List_T *all_right_diagonals, List_T *all_left_diagonals,
+ T sarray, char *queryptr, int querylength, Compress_T query_compress,
+ Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength,
+ Univcoord_T goal, Elt_T *original_elt_array, int best_i, int nelts,
+ bool plusp, int genestrand, bool first_read_p, char conversion[],
+ Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool) {
+ int best_score_right, best_score_left, best_score, score;
+ Elt_T elt, right_elt;
+ List_T *elt_tree;
+ Univcoord_T low, high;
+ Chrpos_T low_chrpos, high_chrpos;
+ int max_leftward, min_leftward, skip_left;
+ int querystart, queryend;
+
Sarrayptr_T initptr, finalptr;
- int plus_niter, minus_niter;
bool successp;
- int plus_querypos, minus_querypos, halfwaypos;
- int nelts, i;
- Chrnum_T chrnum;
- Univcoord_T chroffset, chrhigh, left;
- Chrpos_T chrlength;
- Stage3end_T hit;
- int nmismatches;
- T plus_sarray, minus_sarray;
- char *plus_conversion, *minus_conversion;
+ UINT4 nmatches;
+ int i, j;
+ List_T p;
- if (nmisses_allowed < 0) {
- nmisses_allowed = 0;
- }
- debug(printf("\nStarting Sarray_search_greedy with querylength %d and indexsize %d and nmisses_allowed %d\n",
- querylength,sarray_fwd->indexsize,nmisses_allowed));
- debug(printf("genestrand = %d\n",genestrand));
+ Univdiag_T *diagonal_array, diagonal, prev_diagonal;
+ List_T sub_diagonals;
+ Diag_T sub_diagonal;
+ int querypos;
+ int ndiagonals;
+ int nfound;
+
+ bool *coveredp;
+ Univcoord_T mappingstart, mappingend;
+ Chrpos_T **mappings, chrstart, chrend;
+ int *npositions, totalpositions = 0;
+ int maxnconsecutive = 0;
+ Oligoindex_T oligoindex;
+ bool oned_matrix_p;
+ int indexsize;
- *found_score = querylength;
- if (genestrand == +2) {
- plus_conversion = conversion_rev;
- minus_conversion = conversion_fwd;
- plus_sarray = sarray_rev;
- minus_sarray = sarray_fwd;
- } else {
- plus_conversion = conversion_fwd;
- minus_conversion = conversion_rev;
- plus_sarray = sarray_fwd;
- minus_sarray = sarray_rev;
- }
- /* Do one plus round */
- plus_querypos = 0;
- sarray_search(&initptr,&finalptr,&successp,&best_plus_nmatches,&(queryuc_ptr[plus_querypos]),
- querylength - plus_querypos,/*queryoffset*/plus_querypos,
- query_compress_fwd,plus_sarray,/*plusp*/true,genestrand,first_read_p,plus_conversion);
- best_plus_elt = Elt_new(plus_querypos,best_plus_nmatches,initptr,finalptr);
- plus_querypos += (int) best_plus_nmatches;
- plus_querypos += 1; /* To skip the presumed mismatch */
+ debug13(printf("\n***Entered get_diagonals, plusp %d, with goal %u\n",plusp,goal));
+ /* Make elt tree, which allows for subdivisions of an elt */
+ elt_tree = (List_T *) MALLOC(nelts*sizeof(List_T));
+ for (i = 0; i < nelts; i++) {
+ elt_tree[i] = List_push(NULL,(void *) original_elt_array[i]);
+ }
- /* Do one minus round */
- minus_querypos = 0;
- sarray_search(&initptr,&finalptr,&successp,&best_minus_nmatches,&(queryrc[minus_querypos]),
- querylength - minus_querypos,/*queryoffset*/minus_querypos,
- query_compress_rev,minus_sarray,/*plusp*/false,genestrand,first_read_p,minus_conversion);
- best_minus_elt = Elt_new(minus_querypos,best_minus_nmatches,initptr,finalptr);
- minus_querypos += (int) best_minus_nmatches;
- minus_querypos += 1; /* To skip the presumed mismatch */
-
-
- if (best_plus_nmatches >= querylength/2) {
- /* See if we have a winner */
- debug(printf("best_plus_nmatches = %d > %d/2, so checking mismatches against %d allowed\n",
- best_plus_nmatches,querylength,nmisses_allowed));
- Elt_fill_positions_all(best_plus_elt,plus_sarray);
- for (i = 0; i < best_plus_elt->npositions; i++) {
- left = best_plus_elt->positions[i];
- /* Should return max_mismatches + 1 if it exceeds the limit */
- if ((nmismatches = Genome_count_mismatches_limit(query_compress_fwd,left,/*pos5*/0,/*pos3*/querylength,
- /*max_mismatches*/nmisses_allowed,
- /*plusp*/true,genestrand,first_read_p)) <= nmisses_allowed) {
- chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
- Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
- debug(printf("Case 1: New substitution from beginning\n"));
- if ((hit = Stage3end_new_substitution(&(*found_score),nmismatches,
- left,/*genomiclength*/querylength,
- query_compress_fwd,/*plusp*/true,genestrand,first_read_p,
- chrnum,chroffset,chrhigh,chrlength,
- /*sarrayp*/true)) != NULL) {
- *subs = List_push(*subs,(void *) hit);
- }
- }
- debug(printf("Looking at plus position %u => %d mismatches\n",left,nmismatches));
- }
-#if 0
- } else if (terminal_threshold >= *found_score) {
- debug(printf("terminal_threshold %d exceeds found_score %d, so not checking middle of read\n",terminal_threshold,*found_score));
-#endif
+ /* Compute leftward extensions for right side */
+ debug13(printf("Performing leftward extensions for right side\n"));
+ low = subtract_bounded(goal,/*minusterm*/max_insertionlen,chroffset);
+ high = add_bounded(goal,/*plusterm*/overall_max_distance,chrhigh);
+ for (i = best_i + 1; i < nelts; i++) {
+ elt = (Elt_T) elt_tree[i]->first;
+ Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,first_read_p,
+ /*multiplep*/false);
+ if (elt->npositions > 0) {
+ /* Success: Update low and high for next search */
+ low = subtract_bounded(elt->positions[0],/*minusterm*/max_insertionlen,chroffset);
+ high = add_bounded(elt->positions[elt->npositions-1],/*plusterm*/overall_max_distance,chrhigh);
+ } else {
+ debug13(printf("Elt %d..%d (leftward %d..%d) has no positions, so trying to reduce elt->queryend\n",
+ elt->querystart,elt->queryend,elt->querystart_leftward,elt->queryend));
+ if (i + 1 < nelts) {
+ /* A. Try moving boundary to the left */
+ right_elt = (Elt_T) elt_tree[i+1]->first;
+ Elt_fill_positions_filtered(right_elt,sarray,goal,low,high,query_compress,plusp,genestrand,first_read_p,
+ /*multiplep*/false);
+ if ((max_leftward = Elt_extend_leftward(&min_leftward,right_elt,query_compress,
+ plusp,genestrand,first_read_p,/*skip_left*/0)) > 0) {
+ debug13(printf("Can extend %d..%d leftward by max %d, min %d\n",
+ right_elt->querystart,right_elt->queryend,max_leftward,min_leftward));
+ right_elt->querystart_leftward -= min_leftward; /* Using min_leftward is conservative */
+ queryend = right_elt->querystart_leftward - 2;
+
+ j = i;
+ while (j >= best_i && ((Elt_T) elt_tree[j]->first)->querystart_leftward >= queryend) {
+ debug13(printf("Left-extension of elt %d..%d => %d..%d obliterates elt %d..%d => %d..%d\n",
+ right_elt->querystart,right_elt->queryend,right_elt->querystart_leftward,right_elt->queryend_leftward,
+ ((Elt_T) elt_tree[j]->first)->querystart,((Elt_T) elt_tree[j]->first)->queryend,((Elt_T) elt_tree[j]->first)->querystart_leftward,queryend));
+ --j;
+ }
- } else {
- /* Try starting from middle of read */
- halfwaypos = querylength/2;
- debug(printf("Starting from halfway point on plus\n"));
- sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryuc_ptr[halfwaypos]),
- querylength - halfwaypos,/*queryoffset*/halfwaypos,
- query_compress_fwd,plus_sarray,/*plusp*/true,genestrand,first_read_p,plus_conversion);
- /* Don't want to limit based on nmatches */
- if (1 || nmatches >= querylength - halfwaypos) {
- elt = Elt_new(halfwaypos,nmatches,initptr,finalptr);
- Elt_fill_positions_all(elt,plus_sarray);
- for (i = 0; i < elt->npositions; i++) {
- left = elt->positions[i];
- /* Should return max_mismatches + 1 if it exceeds the limit */
- if ((nmismatches = Genome_count_mismatches_limit(query_compress_fwd,left,/*pos5*/0,/*pos3*/querylength,
- /*max_mismatches*/nmisses_allowed,
- /*plusp*/true,genestrand,first_read_p)) <= nmisses_allowed) {
- chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
- Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
- debug(printf("Case 1: New substitution from middle\n"));
- if ((hit = Stage3end_new_substitution(&(*found_score),nmismatches,
- left,/*genomiclength*/querylength,
- query_compress_fwd,/*plusp*/true,genestrand,first_read_p,
- chrnum,chroffset,chrhigh,chrlength,
- /*sarrayp*/true)) != NULL) {
- *subs = List_push(*subs,(void *) hit);
+ if (j >= best_i) {
+ /* Create a new elt with new positions */
+ querystart = ((Elt_T) elt_tree[j]->first)->querystart_leftward;
+ /* queryend was computed above */
+ sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querystart]),
+ /*querylength*/(queryend + 1) - querystart,/*queryoffset*/querystart,
+ query_compress,sarray,plusp,genestrand,first_read_p,conversion);
+ elt_tree[j] = List_pop(elt_tree[j],(void **) &elt);
+ if (elt->temporaryp == true) {
+ Elt_free(&elt);
+ }
+ elt = Elt_new(querystart,nmatches,initptr,finalptr,/*temporaryp*/true);
+ elt_tree[j] = List_push(NULL,(void *) elt);
+ Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,first_read_p,
+ /*multiplep*/false);
}
}
- debug(printf("Looking at plus position %u => %d mismatches\n",left,nmismatches));
}
- Elt_free(&elt);
+
+ if (elt->npositions > 0) {
+ /* Success: Update low and high for next search */
+ low = subtract_bounded(elt->positions[0],/*minusterm*/max_insertionlen,chroffset);
+ high = add_bounded(elt->positions[elt->npositions-1],/*plusterm*/overall_max_distance,chrhigh);
+ }
}
}
- if (best_minus_nmatches >= querylength/2) {
- /* See if we have a winner */
- debug(printf("best_minus_nmatches = %d > %d/2, so checking mismatches against %d allowed\n",
- best_minus_nmatches,querylength,nmisses_allowed));
- Elt_fill_positions_all(best_minus_elt,minus_sarray);
- for (i = 0; i < best_minus_elt->npositions; i++) {
- left = best_minus_elt->positions[i];
- /* Should return max_mismatches + 1 if it exceeds the limit */
- if ((nmismatches = Genome_count_mismatches_limit(query_compress_rev,left,/*pos5*/0,/*pos3*/querylength,
- /*max_mismatches*/nmisses_allowed,
- /*plusp*/false,genestrand,first_read_p)) <= nmisses_allowed) {
- chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
- Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
- debug(printf("Case 2: New substitution from beginning\n"));
- if ((hit = Stage3end_new_substitution(&(*found_score),nmismatches,
- left,/*genomiclength*/querylength,
- query_compress_rev,/*plusp*/false,genestrand,first_read_p,
- chrnum,chroffset,chrhigh,chrlength,
- /*sarrayp*/true)) != NULL) {
- *subs = List_push(*subs,(void *) hit);
+ /* Compute leftward extensions for left side */
+ debug13(printf("Performing leftward extensions for left side\n"));
+ low = subtract_bounded(goal,/*minusterm*/overall_max_distance,chroffset);
+ high = add_bounded(goal,/*plusterm*/max_insertionlen,chrhigh);
+ for (i = best_i - 1; i >= 0; --i) {
+ elt = (Elt_T) elt_tree[i]->first;
+ Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,first_read_p,
+ /*multiplep*/false);
+ if (elt->npositions > 0) {
+ /* Success: Update low and high for next search */
+ low = subtract_bounded(elt->positions[0],/*minusterm*/overall_max_distance,chroffset);
+ high = add_bounded(elt->positions[elt->npositions-1],/*plusterm*/max_insertionlen,chrhigh);
+ } else {
+ /* A. Try moving boundary to the left */
+ debug13(printf("Elt %d..%d has no positions, so trying to reduce elt->queryend\n",
+ elt->querystart,elt->queryend));
+ if (i + 1 < nelts) {
+ right_elt = (Elt_T) elt_tree[i+1]->first;
+ skip_left = 0;
+ if ((max_leftward = Elt_extend_leftward(&min_leftward,right_elt,query_compress,
+ plusp,genestrand,first_read_p,/*skip_left*/0)) == 0) {
+ skip_left = 1;
+ max_leftward = Elt_extend_leftward(&min_leftward,right_elt,query_compress,
+ plusp,genestrand,first_read_p,skip_left);
+ debug13(printf("On second try, min_leftward is %d, max_leftward is %d\n",min_leftward,max_leftward));
}
- }
- debug(printf("Looking at minus position %u => %d mismatches\n",left,nmismatches));
- }
-
-#if 0
- } else if (terminal_threshold >= *found_score) {
- debug(printf("terminal_threshold %d exceeds found_score %d, so not checking middle of read\n",terminal_threshold,*found_score));
-#endif
- } else {
- /* Try starting from middle of read */
- halfwaypos = querylength/2;
- debug(printf("Starting from halfway point on minus\n"));
- sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryrc[halfwaypos]),
- querylength - halfwaypos,/*queryoffset*/halfwaypos,
- query_compress_rev,minus_sarray,/*plusp*/false,genestrand,first_read_p,minus_conversion);
- /* Don't want to limit based on nmatches */
- if (1 || nmatches >= querylength - halfwaypos) {
- elt = Elt_new(halfwaypos,nmatches,initptr,finalptr);
- Elt_fill_positions_all(elt,minus_sarray);
- for (i = 0; i < elt->npositions; i++) {
- left = elt->positions[i];
- /* Should return max_mismatches + 1 if it exceeds the limit */
- if ((nmismatches = Genome_count_mismatches_limit(query_compress_rev,left,/*pos5*/0,/*pos3*/querylength,
- /*max_mismatches*/nmisses_allowed,
- /*plusp*/false,genestrand,first_read_p)) <= nmisses_allowed) {
- chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
- Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
- debug(printf("Case 2: New substitution from middle\n"));
- if ((hit = Stage3end_new_substitution(&(*found_score),nmismatches,
- left,/*genomiclength*/querylength,
- query_compress_rev,/*plusp*/false,genestrand,first_read_p,
- chrnum,chroffset,chrhigh,chrlength,
- /*sarrayp*/true)) != NULL) {
- *subs = List_push(*subs,(void *) hit);
+ if (max_leftward > 0) {
+ debug13(printf("Can extend %d..%d leftward by max %d, min %d\n",
+ right_elt->querystart,right_elt->queryend,max_leftward,min_leftward));
+ right_elt->querystart_leftward -= min_leftward + skip_left; /* Using min_leftward is conservative */
+ queryend = right_elt->querystart_leftward - 2;
+
+ j = i;
+ while (j >= best_i && ((Elt_T) elt_tree[j]->first)->querystart_leftward >= queryend) {
+ debug13(printf("Left-extension of elt %d..%d => %d..%d obliterates elt %d..%d => %d..%d\n",
+ right_elt->querystart,right_elt->queryend,right_elt->querystart_leftward,right_elt->querystart_leftward,
+ ((Elt_T) elt_tree[j]->first)->querystart,((Elt_T) elt_tree[j]->first)->queryend,((Elt_T) elt_tree[j]->first)->querystart_leftward,queryend));
+ --j;
+ }
+
+ if (j >= 0) {
+ /* Create a new elt with new positions */
+ querystart = ((Elt_T) elt_tree[j]->first)->querystart_leftward;
+ /* queryend was computed above */
+ sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querystart]),
+ /*querylength*/(queryend + 1) - querystart,/*queryoffset*/querystart,
+ query_compress,sarray,plusp,genestrand,first_read_p,conversion);
+ elt_tree[j] = List_pop(elt_tree[j],(void **) &elt);
+ if (elt->temporaryp == true) {
+ Elt_free(&elt);
+ }
+ elt = Elt_new(querystart,nmatches,initptr,finalptr,/*temporaryp*/true);
+ elt_tree[j] = List_push(NULL,(void *) elt);
+ Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,first_read_p,
+ /*multiplep*/false);
}
}
- debug(printf("Looking at minus position %u => %d mismatches\n",left,nmismatches));
}
- Elt_free(&elt);
+
+ if (elt->npositions > 0) {
+ /* Success: Update low and high for next search */
+ low = subtract_bounded(elt->positions[0],/*minusterm*/overall_max_distance,chroffset);
+ high = add_bounded(elt->positions[elt->npositions-1],/*plusterm*/max_insertionlen,chrhigh);
+ }
}
}
-
- debug(printf("Found %d subs\n",List_length(*subs)));
-#if 0
- /* Allow identification of splicing, even if substitutions are found */
- if (*subs != NULL) {
- /* Be satisfied with 1-mismatch results */
- Elt_free(&best_plus_elt);
- Elt_free(&best_minus_elt);
- return;
- }
-#endif
+#ifdef SUBDIVIDE_NOMATCHES
+ /* Try to subdivide elts that have no matches */
+ coveredp = (bool *) CALLOCA(querylength,sizeof(bool));
+ mappings = (Chrpos_T **) MALLOCA(querylength * sizeof(Chrpos_T *));
+ npositions = (int *) CALLOCA(querylength,sizeof(int));
+ oligoindex = Oligoindex_array_elt(oligoindices_minor,/*source*/0);
+ indexsize = Oligoindex_indexsize(oligoindex);
+
+
+ debug13(printf("Starting subdivisions on right side\n"));
+ low = subtract_bounded(goal,/*minusterm*/max_insertionlen,chroffset);
+ high = add_bounded(goal,/*plusterm*/overall_max_distance,chrhigh);
+ i = best_i + 1;
+ while (i < nelts) {
+ elt = (Elt_T) elt_tree[i]->first;
+ debug13(printf("Elt #%d at %d..%d has %d matching positions\n",i,elt->querystart,elt->queryend,elt->npositions));
+
+ if (elt->npositions > 0) {
+ low = subtract_bounded(elt->positions[0],/*minusterm*/max_insertionlen,chroffset);
+ high = add_bounded(elt->positions[elt->npositions-1],/*plusterm*/overall_max_distance,chrhigh);
+ i++;
+ } else {
+ j = i;
+ querystart = elt->querystart_leftward;
+ while (j + 1 < nelts && ((Elt_T) elt_tree[j+1]->first)->npositions <= 0) {
+ j = j + 1;
+ }
+ elt = (Elt_T) elt_tree[j]->first;
+ queryend = elt->queryend_leftward;
+ debug13(printf("Elts from %d through %d have no matching positions\n",i,j));
- if (plus_querypos >= querylength) {
- plus_set = (List_T) NULL;
- } else {
- /* Extend plus side a second time */
- sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryuc_ptr[plus_querypos]),
- querylength - plus_querypos,/*queryoffset*/plus_querypos,
- query_compress_fwd,plus_sarray,/*plusp*/true,genestrand,first_read_p,plus_conversion);
- elt = Elt_new(plus_querypos,nmatches,initptr,finalptr);
- plus_querypos += nmatches;
- plus_querypos += 1; /* To skip the presumed mismatch */
+#if 0
+ nfound = 0;
+ /* B. Try subdividing elt using 16-mers every 8 */
+ debug13(printf("B. Try to subdivide elt region at %d..%d\n",querystart,queryend));
+ for (querypos = queryend - 16; querypos >= querystart; querypos -= 8) {
+ sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
+ /*querylength*/16,/*queryoffset*/querypos,
+ query_compress,sarray,plusp,genestrand,first_read_p,conversion);
+ elt = Elt_new(querypos,nmatches,initptr,finalptr,/*temporaryp*/true);
+ elt_tree[i] = List_push(elt_tree[i],(void *) elt);
+ Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,first_read_p);
+ nfound += elt->npositions;
+ debug13(printf("Subelt at %d..%d has %d matching positions\n",elt->querystart,elt->queryend,elt->npositions));
+ }
- debug(printf("plus_querypos %d vs querylength %d\n",plus_querypos,querylength));
- if (nmatches <= best_plus_nmatches) {
- /* Initial (left) elt was best */
- debug(printf("Initial elt %p was best:\n",best_plus_elt));
- plus_set = List_push(NULL,elt);
- if (plus_querypos >= querylength) {
- chrhigh = 0U;
- Elt_fill_positions_all(best_plus_elt,plus_sarray);
- debug(Elt_dump(best_plus_elt));
- for (i = 0; i < best_plus_elt->npositions; i++) {
- left = best_plus_elt->positions[i];
- if (left > chrhigh) {
- chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
- Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
- /* *chrhigh += 1U; */
- }
- if (extend_rightward(/*goal*/left,chroffset,chrhigh,/*rightward_set*/plus_set,
- query_compress_fwd,plus_sarray,/*plusp*/true,genestrand,first_read_p,
- best_plus_elt->queryend) == true) {
- collect_elt_matches(&(*found_score),&(*subs),&(*indels),&(*ambiguous),&(*singlesplicing),&(*doublesplicing),
- best_plus_elt->querystart,best_plus_elt->queryend,
- chrnum,chroffset,chrhigh,chrlength,
- /*goal*/left,/*rightward_set*/plus_set,/*leftward_set*/NULL,
- querylength,query_compress_fwd,/*plusp*/true,genestrand,first_read_p,
- nmisses_allowed);
- }
+ if (nfound == 0) {
+ /* C. Try subdividing elt using 16-mers every 1 */
+ debug13(printf("C. Try to subdivide elt region at %d..%d\n",querystart,queryend));
+ for (querypos = queryend - 16; querypos >= querystart; querypos -= 1) {
+ sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
+ /*querylength*/16,/*queryoffset*/querypos,
+ query_compress,sarray,plusp,genestrand,first_read_p,conversion);
+ elt = Elt_new(querypos,nmatches,initptr,finalptr,/*temporaryp*/true);
+ elt_tree[i] = List_push(elt_tree[i],(void *) elt);
+ Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,first_read_p);
+ nfound += elt->npositions;
+ debug13(printf("Subelt at %d..%d has %d matching positions\n",elt->querystart,elt->queryend,elt->npositions));
}
}
- } else {
- /* Second (right) plus elt is best */
- debug(printf("Second plus elt %p is best:\n",elt));
- plus_set = List_push(NULL,best_plus_elt);
- best_plus_elt = elt;
- best_plus_nmatches = nmatches;
- if (plus_querypos >= querylength) {
- chrhigh = 0U;
- Elt_fill_positions_all(best_plus_elt,plus_sarray);
- debug(Elt_dump(best_plus_elt));
- for (i = 0; i < best_plus_elt->npositions; i++) {
- left = best_plus_elt->positions[i];
- if (left > chrhigh) {
- chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
- Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
- /* *chrhigh += 1U; */
- }
- nmatches = Genome_consecutive_matches_leftward(query_compress_fwd,left,
- /*pos5*/0,/*pos3*/best_plus_elt->querystart,
- /*plusp*/true,genestrand,first_read_p);
- debug(printf("Looking at position %u => %d matches leftward\n",left,nmatches));
- best_plus_elt->querystart -= nmatches;
- if (extend_leftward(/*goal*/left,chroffset,chrhigh,/*leftward_set*/plus_set,
- /*queryptr*/queryuc_ptr,query_compress_fwd,
- plus_sarray,/*plusp*/true,genestrand,first_read_p,plus_conversion,
- best_plus_elt->querystart,best_plus_elt->queryend) == true) {
- collect_elt_matches(&(*found_score),&(*subs),&(*indels),&(*ambiguous),&(*singlesplicing),&(*doublesplicing),
- best_plus_elt->querystart,best_plus_elt->queryend,
- chrnum,chroffset,chrhigh,chrlength,
- /*goal*/left,/*rightward_set*/NULL,/*leftward_set*/plus_set,
- querylength,query_compress_fwd,/*plusp*/true,genestrand,first_read_p,
- nmisses_allowed);
- }
- best_plus_elt->querystart += nmatches;
+
+ if (nfound == 0) {
+ /* D. Try subdividing elt using 8-mers every 1 */
+ debug13(printf("D. Try to subdivide elt region at %d..%d\n",querystart,queryend));
+ for (querypos = queryend - 8; querypos >= querystart; querypos -= 1) {
+ sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
+ /*querylength*/8,/*queryoffset*/querypos,
+ query_compress,sarray,plusp,genestrand,first_read_p,conversion);
+ elt = Elt_new(querypos,nmatches,initptr,finalptr,/*temporaryp*/true);
+ elt_tree[i] = List_push(elt_tree[i],(void *) elt);
+ Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,first_read_p);
+ nfound += elt->npositions;
+ debug13(printf("Subelt at %d..%d has %d matching positions\n",elt->querystart,elt->queryend,elt->npositions));
}
}
+
+#else
+
+ mappingstart = low + querystart;
+ mappingend = high + queryend;
+ chrstart = mappingstart - chroffset;
+ chrend = mappingend - chroffset;
+
+ Oligoindex_hr_tally(oligoindex,mappingstart,mappingend,/*plusp:true*/true,
+ queryptr,querystart,queryend,/*chrpos*/chrstart,genestrand);
+ sub_diagonals = Oligoindex_get_mappings(NULL,coveredp,mappings,npositions,&totalpositions,
+ &oned_matrix_p,&maxnconsecutive,oligoindices_minor,oligoindex,
+ queryptr,querystart,queryend,querylength,
+ chrstart,chrend,chroffset,chrhigh,/*plusp:true*/true,diagpool);
+ Oligoindex_untally(oligoindex,queryptr,querylength);
+
+ debug14(printf("Got %d sub diagonals\n",List_length(sub_diagonals)));
+ for (p = sub_diagonals; p != NULL; p = List_next(p)) {
+ sub_diagonal = (Diag_T) List_head(p);
+ debug14(printf("%d..%d %u\n",sub_diagonal->querystart,sub_diagonal->queryend + indexsize - 1,chrstart + sub_diagonal->diagonal));
+ elt = Elt_new_fillin(sub_diagonal->querystart,sub_diagonal->queryend,indexsize,chroffset + chrstart + sub_diagonal->diagonal);
+ elt_tree[i] = List_push(elt_tree[i],(void *) elt);
+ }
+
+#endif
+
+ i = j + 1;
}
}
-
- if (minus_querypos >= querylength) {
- minus_set = (List_T) NULL;
- } else {
- /* Extend minus side a second time */
- sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryrc[minus_querypos]),
- querylength - minus_querypos,/*queryoffset*/minus_querypos,
- query_compress_rev,minus_sarray,/*plusp*/false,genestrand,first_read_p,minus_conversion);
- elt = Elt_new(minus_querypos,nmatches,initptr,finalptr);
- minus_querypos += nmatches;
- minus_querypos += 1; /* To skip the presumed mismatch */
- debug(printf("minus_querypos %d vs querylength %d\n",minus_querypos,querylength));
- if (nmatches <= best_minus_nmatches) {
- /* Initial (left) elt was best */
- debug(printf("Initial elt %p was best:\n",best_minus_elt));
- minus_set = List_push(NULL,elt);
- if (minus_querypos >= querylength) {
- chrhigh = 0U;
- Elt_fill_positions_all(best_minus_elt,minus_sarray);
- debug(Elt_dump(best_minus_elt));
- for (i = 0; i < best_minus_elt->npositions; i++) {
- left = best_minus_elt->positions[i];
- if (left > chrhigh) {
- chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
- Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
- /* *chrhigh += 1U; */
- }
- if (extend_rightward(/*goal*/left,chroffset,chrhigh,/*rightward_set*/minus_set,
- query_compress_rev,minus_sarray,/*plusp*/false,genestrand,first_read_p,
- best_minus_elt->queryend) == true) {
- collect_elt_matches(&(*found_score),&(*subs),&(*indels),&(*ambiguous),&(*singlesplicing),&(*doublesplicing),
- best_minus_elt->querystart,best_minus_elt->queryend,
- chrnum,chroffset,chrhigh,chrlength,
- /*goal*/left,/*rightward_set*/minus_set,/*leftward_set*/NULL,
- querylength,query_compress_rev,/*plusp*/false,genestrand,first_read_p,
- nmisses_allowed);
- }
+
+ debug13(printf("Starting subdivisions on left side\n"));
+ low = subtract_bounded(goal,/*minusterm*/overall_max_distance,chroffset);
+ high = add_bounded(goal,/*plusterm*/max_insertionlen,chrhigh);
+ i = best_i - 1;
+ while (i >= 0) {
+ elt = (Elt_T) elt_tree[i]->first;
+ debug13(printf("Elt #%d at %d..%d has %d matching positions\n",i,elt->querystart,elt->queryend,elt->npositions));
+
+ if (elt->npositions > 0) {
+ low = subtract_bounded(elt->positions[0],/*minusterm*/overall_max_distance,chroffset);
+ high = add_bounded(elt->positions[elt->npositions-1],/*plusterm*/max_insertionlen,chrhigh);
+ --i;
+
+ } else {
+ j = i;
+ queryend = elt->queryend_leftward;
+ while (j - 1 >= 0 && ((Elt_T) elt_tree[j-1]->first)->npositions <= 0) {
+ j = j - 1;
+ }
+ elt = (Elt_T) elt_tree[j]->first;
+ querystart = elt->querystart_leftward;
+ debug13(printf("Elts from %d through %d have no matching positions\n",i,j));
+
+#if 0
+ nfound = 0;
+ /* B. Try subdividing elt using 16-mers every 8 */
+ debug13(printf("B. Try to subdivide elt region at %d..%d\n",querystart,queryend));
+ for (querypos = queryend - 16; querypos >= querystart; querypos -= 8) {
+ sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querystart]),
+ /*querylength*/16,/*queryoffset*/querystart,
+ query_compress,sarray,plusp,genestrand,first_read_p,conversion);
+ elt = Elt_new(querystart,nmatches,initptr,finalptr,/*temporaryp*/true);
+ elt_tree[i] = List_push(elt_tree[i],(void *) elt);
+ Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,first_read_p);
+ nfound += elt->npositions;
+ debug13(printf("Subelt at %d..%d has %d matching positions\n",elt->querystart,elt->queryend,elt->npositions));
+ }
+
+ if (nfound == 0) {
+ /* C. Try subdividing elt using 16-mers every 1 */
+ debug13(printf("C. Try to subdivide elt region at %d..%d\n",querystart,queryend));
+ for (querypos = queryend - 16; querypos >= querystart; querypos -= 1) {
+ sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
+ /*querylength*/16,/*queryoffset*/querypos,
+ query_compress,sarray,plusp,genestrand,first_read_p,conversion);
+ elt = Elt_new(querypos,nmatches,initptr,finalptr,/*temporaryp*/true);
+ elt_tree[i] = List_push(elt_tree[i],(void *) elt);
+ Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,first_read_p);
+ nfound += elt->npositions;
+ debug13(printf("Subelt at %d..%d has %d matching positions\n",elt->querystart,elt->queryend,elt->npositions));
}
}
- } else {
- /* Second (right) minus elt is best */
- debug(printf("Second minus elt %p is best:\n",elt));
- minus_set = List_push(NULL,best_minus_elt);
- best_minus_elt = elt;
- best_minus_nmatches = nmatches;
- if (minus_querypos >= querylength) {
- chrhigh = 0U;
- Elt_fill_positions_all(best_minus_elt,minus_sarray);
- debug(Elt_dump(best_minus_elt));
- for (i = 0; i < best_minus_elt->npositions; i++) {
- left = best_minus_elt->positions[i];
- if (left > chrhigh) {
- chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
- Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
- /* *chrhigh += 1U; */
- }
- nmatches = Genome_consecutive_matches_leftward(query_compress_rev,left,
- /*pos5*/0,/*pos3*/best_minus_elt->querystart,
- /*plusp*/false,genestrand,first_read_p);
- debug(printf(" extending bestelt querystart %d leftward by %d matches\n",best_minus_elt->querystart,nmatches));
- best_minus_elt->querystart -= nmatches;
- if (extend_leftward(/*goal*/left,chroffset,chrhigh,/*leftward_set*/minus_set,
- /*queryptr*/queryrc,query_compress_rev,
- minus_sarray,/*plusp*/false,genestrand,first_read_p,minus_conversion,
- best_minus_elt->querystart,best_minus_elt->queryend) == true) {
- collect_elt_matches(&(*found_score),&(*subs),&(*indels),&(*ambiguous),&(*singlesplicing),&(*doublesplicing),
- best_minus_elt->querystart,best_minus_elt->queryend,
- chrnum,chroffset,chrhigh,chrlength,
- /*goal*/left,/*rightward_set*/NULL,/*leftward_set*/minus_set,
- querylength,query_compress_rev,/*plusp*/false,genestrand,first_read_p,
- nmisses_allowed);
- }
- best_minus_elt->querystart += nmatches;
+
+ if (nfound == 0) {
+ /* D. Try subdividing elt using 8-mers every 1 */
+ debug13(printf("D. Try to subdivide elt region at %d..%d\n",querystart,queryend));
+ for (querypos = queryend - 8; querypos >= querystart; querypos -= 1) {
+ sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
+ /*querylength*/8,/*queryoffset*/querypos,
+ query_compress,sarray,plusp,genestrand,first_read_p,conversion);
+ elt = Elt_new(querypos,nmatches,initptr,finalptr,/*temporaryp*/true);
+ elt_tree[i] = List_push(elt_tree[i],(void *) elt);
+ Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,first_read_p);
+ nfound += elt->npositions;
+ debug13(printf("Subelt at %d..%d has %d matching positions\n",elt->querystart,elt->queryend,elt->npositions));
}
}
+
+#else
+
+ mappingstart = low + querystart;
+ mappingend = high + queryend;
+ chrstart = mappingstart - chroffset;
+ chrend = mappingend - chroffset;
+
+ Oligoindex_hr_tally(oligoindex,mappingstart,mappingend,/*plusp:true*/true,
+ queryptr,querystart,queryend,/*chrpos*/chrstart,genestrand);
+ sub_diagonals = Oligoindex_get_mappings(NULL,coveredp,mappings,npositions,&totalpositions,
+ &oned_matrix_p,&maxnconsecutive,oligoindices_minor,oligoindex,
+ queryptr,querystart,queryend,querylength,
+ chrstart,chrend,chroffset,chrhigh,/*plusp:true*/true,diagpool);
+ Oligoindex_untally(oligoindex,queryptr,querylength);
+
+ debug14(printf("Got %d sub diagonals\n",List_length(sub_diagonals)));
+ for (p = sub_diagonals; p != NULL; p = List_next(p)) {
+ sub_diagonal = (Diag_T) List_head(p);
+ debug14(printf("%d..%d %u\n",sub_diagonal->querystart,sub_diagonal->queryend + indexsize - 1,chrstart + sub_diagonal->diagonal));
+ elt = Elt_new_fillin(sub_diagonal->querystart,sub_diagonal->queryend,indexsize,chroffset + chrstart + sub_diagonal->diagonal);
+ elt_tree[i] = List_push(elt_tree[i],(void *) elt);
+ }
+#endif
+
+ i = j - 1;
}
}
+#endif
- debug(printf("Found %d subs, %d indels, %d singlesplices, %d doublesplices\n",
- List_length(*subs),List_length(*indels),List_length(*singlesplicing),List_length(*doublesplicing)));
- debug(printf("found_score %d vs querylength %d\n",*found_score,querylength));
- if (*found_score < querylength) {
- /* Be satisfied with a two-part alignment */
- if (plus_set != NULL) {
- elt = List_head(plus_set);
- Elt_free(&elt);
- List_free(&plus_set);
- }
- Elt_free(&best_plus_elt);
- if (minus_set != NULL) {
- elt = List_head(minus_set);
- Elt_free(&elt);
- List_free(&minus_set);
- }
- Elt_free(&best_minus_elt);
- return;
+ /* Create diagonals. We give a bonus of +1 for being on the same
+ diagonal. This means that we should count consecutive regions
+ within each diagonal as 2 points. Then an indel or gap will
+ give only 1 point, or a relative penalty. */
+ assert(List_length(elt_tree[best_i]) == 1);
+ elt = (Elt_T) elt_tree[best_i]->first;
+ /* Don't use leftward values */
+ *middle_diagonal = Univdiag_new(elt->querystart,elt->queryend,/*univdiagonal*/goal);
+ (*middle_diagonal)->intscore = 2*(elt->queryend - elt->querystart + 1);
+ debug13(printf("Creating middle diagonal: query %d..%d, diagonal %u = goal %u - chroffset %u\n",
+ elt->querystart,elt->queryend,goal - chroffset,goal,chroffset));
+ if (elt->temporaryp == true) {
+ Elt_free(&elt);
} else {
- plus_set = List_push(plus_set,best_plus_elt);
- minus_set = List_push(minus_set,best_minus_elt);
+ Elt_reset(elt);
+ }
+ List_free(&(elt_tree[best_i]));
-#if 0
- /* Checking middle of read above */
- halfwaypos = querylength/2;
- if (best_plus_nmatches < halfwaypos) {
- /* Start from middle of read */
- debug(printf("Starting from halfway point on plus\n"));
- sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryuc_ptr[halfwaypos]),
- querylength - halfwaypos,/*queryoffset*/halfwaypos,
- query_compress_fwd,plus_sarray,/*plusp*/true,genestrand,first_read_p,plus_conversion);
- elt = Elt_new(halfwaypos,nmatches,initptr,finalptr);
- if (nmatches > best_plus_nmatches) {
- best_plus_elt = elt;
- best_plus_nmatches = nmatches;
- }
- plus_set = List_push(plus_set,elt);
- }
- if (best_minus_nmatches < halfwaypos) {
- /* Start from middle of read */
- debug(printf("Starting from halfway point on minus\n"));
- sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryrc[halfwaypos]),
- querylength - halfwaypos,/*queryoffset*/halfwaypos,
- query_compress_rev,minus_sarray,/*plusp*/false,genestrand,first_read_p,minus_conversion);
- elt = Elt_new(halfwaypos,nmatches,initptr,finalptr);
- if (nmatches > best_minus_nmatches) {
- best_minus_elt = elt;
- best_minus_nmatches = nmatches;
+ *all_right_diagonals = (List_T) NULL;
+ for (i = nelts - 1; i > best_i; --i) { /* Go in this order to avoid reversing list at the end */
+ for (p = elt_tree[i]; p != NULL; p = List_next(p)) {
+ elt = (Elt_T) p->first;
+ if (elt->fillin_p == true) {
+ /* Created by oligoindex */
+ diagonal = Univdiag_new(elt->querystart_leftward,elt->queryend_leftward,/*univdiagonal*/elt->positions[0]);
+ diagonal->nmismatches_known_p = false;
+ *all_right_diagonals = List_push(*all_right_diagonals,(void *) diagonal);
+ } else if (elt->querystart_leftward < elt->queryend_leftward) {
+ for (j = elt->npositions - 1; j >= 0; --j) { /* Go in this order to avoid reversing list at the end */
+ debug13(printf("Creating right diagonal: query %d..%d (leftward %d..%d), diagonal %u\n",
+ elt->querystart,elt->queryend,elt->querystart_leftward,elt->queryend_leftward,elt->positions[j] - chroffset));
+ *all_right_diagonals = List_push(*all_right_diagonals,Univdiag_new(elt->querystart_leftward,elt->queryend_leftward,
+ /*univdiagonal*/elt->positions[j]));
+ }
+ }
+ if (elt->temporaryp == true) {
+ Elt_free(&elt);
+ } else {
+ Elt_reset(elt);
}
- minus_set = List_push(minus_set,elt);
}
-#endif
+ List_free(&(elt_tree[i]));
}
- plus_niter = minus_niter = 2;
- /* Both sides have failed and we don't have a good best hit. Use up given allotment of attempts. */
- while (plus_querypos < querylength && plus_niter < nmisses_allowed) {
- sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryuc_ptr[plus_querypos]),
- querylength - plus_querypos,/*queryoffset*/plus_querypos,
- query_compress_fwd,plus_sarray,/*plusp*/true,genestrand,first_read_p,plus_conversion);
- elt = Elt_new(plus_querypos,nmatches,initptr,finalptr);
- plus_set = List_push(plus_set,(void *) elt);
- if (nmatches > best_plus_nmatches) {
- best_plus_elt = elt;
- best_plus_nmatches = nmatches;
+ *all_left_diagonals = (List_T) NULL;
+ for (i = 0; i < best_i; i++) { /* Go in this order to avoid reversing list at the end */
+ for (p = elt_tree[i]; p != NULL; p = List_next(p)) {
+ elt = (Elt_T) p->first;
+ if (elt->fillin_p == true) {
+ /* Created by oligoindex */
+ diagonal = Univdiag_new(elt->querystart_leftward,elt->queryend_leftward,/*univdiagonal*/elt->positions[0]);
+ diagonal->nmismatches_known_p = false; /* Signifies that we don't know the number of mismatches */
+ *all_left_diagonals = List_push(*all_left_diagonals,(void *) diagonal);
+ } else if (elt->querystart_leftward < elt->queryend_leftward) {
+ for (j = 0; j < elt->npositions; j++) { /* Go in this order to avoid reversing list at the end */
+ debug13(printf("Creating left diagonal: query %d..%d (leftward %d..%d), diagonal %u\n",
+ elt->querystart,elt->queryend,elt->querystart_leftward,elt->queryend_leftward,elt->positions[j] - chroffset));
+ *all_left_diagonals = List_push(*all_left_diagonals,Univdiag_new(elt->querystart_leftward,elt->queryend_leftward,
+ /*univdiagonal*/elt->positions[j]));
+ }
+ }
+ if (elt->temporaryp == true) {
+ Elt_free(&elt);
+ } else {
+ Elt_reset(elt);
+ }
+ }
+ List_free(&(elt_tree[i]));
+ }
+
+ FREE(elt_tree);
+
+
+
+ /* A. Compute right diagonals */
+ /* A1. Scoring for dynamic programming */
+ diagonal_array = (Univdiag_T *) List_to_array_n(&ndiagonals,*all_right_diagonals);
+#ifdef DEBUG12
+ printf("Right side before sorting\n");
+ for (i = 0; i < ndiagonals; i++) {
+ diagonal = diagonal_array[i];
+ printf("%d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->diagonal);
+ }
+#endif
+
+ /* TODO: May be able to skip this sorting step */
+ qsort(diagonal_array,ndiagonals,sizeof(Univdiag_T),Univdiag_ascending_cmp);
+#ifdef DEBUG12
+ printf("Right side after sorting\n");
+ for (i = 0; i < ndiagonals; i++) {
+ diagonal = diagonal_array[i];
+ printf("%d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal);
+ }
+#endif
+
+
+ for (i = 0; i < ndiagonals; i++) {
+ diagonal = diagonal_array[i];
+ debug13(printf("%d: %d..%d at %u\n",i,diagonal->querystart,diagonal->queryend,diagonal->univdiagonal));
+
+ low = subtract_bounded(diagonal->univdiagonal,overall_max_distance,chroffset);
+ high = add_bounded(diagonal->univdiagonal,max_insertionlen,chrhigh);
+ querypos = diagonal->querystart;
+ best_score = 0;
+
+ for (j = i - 1; j >= 0; --j) {
+ prev_diagonal = diagonal_array[j];
+ debug13(printf(" %d: %d..%d at %u ",j,prev_diagonal->querystart,prev_diagonal->queryend,prev_diagonal->univdiagonal));
+
+ if (prev_diagonal->queryend >= querypos) {
+ debug13(printf("Skipping because queryend %d >= querypos %d\n",prev_diagonal->queryend,querypos));
+ } else if (prev_diagonal->univdiagonal < low) {
+ debug13(printf("Skipping because diagonal %u < low_chrpos %u\n",prev_diagonal->univdiagonal,low));
+ } else if (prev_diagonal->univdiagonal > high) {
+ debug13(printf("Skipping because diagonal %u > high_chrpos %u\n",prev_diagonal->univdiagonal,high));
+ } else {
+ score = prev_diagonal->intscore;
+ if (prev_diagonal->univdiagonal == diagonal->univdiagonal) {
+ score += 1;
+ }
+ if (score <= best_score) {
+ debug13(printf("Skipping because score %d <= best_score %d\n",score,best_score));
+ } else {
+ best_score = score;
+ diagonal->prev = prev_diagonal;
+ debug13(printf("Updating best score to be %d. Prev diagonal is %d..%d at %u\n",
+ best_score,prev_diagonal->querystart,prev_diagonal->queryend,prev_diagonal->univdiagonal));
+ }
+ }
+ }
+
+ /* Handle links to middle diagonal */
+ prev_diagonal = *middle_diagonal;
+ debug13(printf(" Middle: %d..%d at %u ",prev_diagonal->querystart,prev_diagonal->queryend,prev_diagonal->univdiagonal));
+ if (prev_diagonal->queryend >= querypos) {
+ debug13(printf("Skipping because queryend %d >= querypos %d\n",prev_diagonal->queryend,querypos));
+ } else if (prev_diagonal->univdiagonal < low) {
+ debug13(printf("Skipping because diagonal %u < low_chrpos %u\n",prev_diagonal->univdiagonal,low));
+ } else if (prev_diagonal->univdiagonal > high) {
+ debug13(printf("Skipping because diagonal %u > high_chrpos %u\n",prev_diagonal->univdiagonal,high));
+ } else {
+ score = prev_diagonal->intscore;
+ if (prev_diagonal->univdiagonal == diagonal->univdiagonal) {
+ score += 1; /* This bonus means we should double count contiguous region within each segment */
+ }
+ if (score <= best_score) {
+ debug13(printf("Skipping because score %d <= best_score %d\n",score,best_score));
+ } else {
+ best_score = score;
+ /* diagonal->prev = (Univdiag_T) NULL; */
+ debug13(printf("Updating best score (for link to middle diagonal) to be %d\n",best_score));
+ }
+ }
+
+ diagonal->intscore = best_score + 2*diagonal->nconsecutive;
+ debug13(printf("Right diagonal %d..%d at %u gets score %d\n",
+ diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->intscore));
+ }
+ FREE(diagonal_array);
+
+
+ /* A2. Optimizing for dynamic programming */
+ best_score_right = 0;
+ *best_right_diagonals = (List_T) NULL;
+ for (p = *all_right_diagonals; p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ if (diagonal->intscore > best_score_right) {
+ best_score_right = diagonal->intscore;
+ List_free(&(*best_right_diagonals));
+ *best_right_diagonals = List_push(NULL,(void *) diagonal);
+ } else if (diagonal->intscore == best_score_right) {
+ *best_right_diagonals = List_push(*best_right_diagonals,(void *) diagonal);
+ }
+ }
+
+
+ /* C. Compute left diagonals */
+ /* C1. Scoring for dynamic programming */
+ diagonal_array = (Univdiag_T *) List_to_array_n(&ndiagonals,*all_left_diagonals);
+#ifdef DEBUG12
+ printf("Left side before sorting\n");
+ for (i = 0; i < ndiagonals; i++) {
+ diagonal = diagonal_array[i];
+ printf("%d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal);
+ }
+#endif
+
+ /* TODO: May be able to skip this sorting step */
+ qsort(diagonal_array,ndiagonals,sizeof(Univdiag_T),Univdiag_descending_cmp);
+#ifdef DEBUG12
+ printf("Left side after sorting\n");
+ for (i = 0; i < ndiagonals; i++) {
+ diagonal = diagonal_array[i];
+ printf("%d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->diagonal);
+ }
+#endif
+
+ for (i = 0; i < ndiagonals; i++) {
+ diagonal = diagonal_array[i];
+ debug13(printf("%d: %d..%d at %u\n",i,diagonal->querystart,diagonal->queryend,diagonal->univdiagonal));
+
+ low = subtract_bounded(diagonal->univdiagonal,max_insertionlen,chroffset);
+ high = add_bounded(diagonal->univdiagonal,overall_max_distance,chrhigh);
+ querypos = diagonal->queryend;
+ best_score = 0;
+
+ for (j = i - 1; j >= 0; --j) {
+ prev_diagonal = diagonal_array[j];
+ debug13(printf(" %d: %d..%d at %u ",j,prev_diagonal->querystart,prev_diagonal->queryend,prev_diagonal->univdiagonal));
+
+ if (prev_diagonal->querystart <= querypos) {
+ debug13(printf("Skipping because querystart %d <= querypos %d\n",prev_diagonal->querystart,querypos));
+ } else if (prev_diagonal->univdiagonal < low) {
+ debug13(printf("Skipping because diagonal %u < low %u\n",prev_diagonal->univdiagonal,low));
+ } else if (prev_diagonal->univdiagonal > high) {
+ debug13(printf("Skipping because diagonal %u > high %u\n",prev_diagonal->univdiagonal,high));
+ } else {
+ score = prev_diagonal->intscore;
+ if (prev_diagonal->univdiagonal == diagonal->univdiagonal) {
+ score += 1;
+ }
+ if (score <= best_score) {
+ debug13(printf("Skipping because score %d <= best_score %d\n",score,best_score));
+ } else {
+ best_score = score;
+ diagonal->prev = prev_diagonal;
+ debug13(printf("Updating best score to be %d. Prev diagonal is %d..%d at %u\n",
+ best_score,prev_diagonal->querystart,prev_diagonal->queryend,prev_diagonal->univdiagonal));
+ }
+ }
+ }
+
+ /* Handle links to middle diagonal */
+ prev_diagonal = *middle_diagonal;
+ debug13(printf(" Middle: %d..%d at %u ",prev_diagonal->querystart,prev_diagonal->queryend,prev_diagonal->univdiagonal));
+ if (prev_diagonal->querystart <= querypos) {
+ debug13(printf("Skipping because querystart %d <= querypos %d\n",prev_diagonal->querystart,querypos));
+ } else if (prev_diagonal->univdiagonal < low) {
+ debug13(printf("Skipping because diagonal %u < low_chrpos %u\n",prev_diagonal->univdiagonal,low));
+ } else if (prev_diagonal->univdiagonal > high) {
+ debug13(printf("Skipping because diagonal %u > high_chrpos %u\n",prev_diagonal->univdiagonal,high));
+ } else {
+ score = prev_diagonal->intscore;
+ if (prev_diagonal->univdiagonal == diagonal->univdiagonal) {
+ score += 1; /* This bonus means we should double count contiguous region within each segment */
+ }
+ if (score <= best_score) {
+ debug13(printf("Skipping because score %d <= best_score %d\n",prev_diagonal->intscore,best_score));
+ } else {
+ best_score = score;
+ /* diagonal->prev = (Univdiag_T) NULL; */
+ debug13(printf("Updating best score (for link to middle diagonal) to be %d\n",best_score));
+ }
+ }
+
+ diagonal->intscore = best_score + 2*diagonal->nconsecutive;
+ debug13(printf("Left diagonal %d..%d at %u gets score %d\n",
+ diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->intscore));
+ }
+ FREE(diagonal_array);
+
+
+ /* C2. Optimizing for dynamic programming */
+ best_score_left = 0;
+ *best_left_diagonals = (List_T) NULL;
+ for (p = *all_left_diagonals; p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ if (diagonal->intscore > best_score_left) {
+ best_score_left = diagonal->intscore;
+ List_free(&(*best_left_diagonals));
+ *best_left_diagonals = List_push(NULL,(void *) diagonal);
+ } else if (diagonal->intscore == best_score_left) {
+ *best_left_diagonals = List_push(*best_left_diagonals,(void *) diagonal);
+ }
+ }
+
+#if 0
+ printf("Best on the left\n");
+ for (p = *best_left_diagonals; p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ printf("Score %d: %d..%d at %u\n",diagonal->intscore,diagonal->querystart,diagonal->queryend,diagonal->diagonal);
+ }
+#endif
+
+
+ if (best_score_left == 0 && best_score_right == 0) {
+ return (*middle_diagonal)->intscore;
+ } else if (best_score_left == 0) {
+ return best_score_right;
+ } else if (best_score_right == 0) {
+ return best_score_left;
+ } else {
+ /* middle_diagonal score is double counted */
+ return best_score_left + best_score_right - (*middle_diagonal)->intscore;
+ }
+}
+
+
+static List_T
+find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T *right_endpoints_antisense,
+ Intlist_T *right_queryends_sense, Intlist_T *right_queryends_antisense,
+ Uintlist_T *right_ambcoords_sense, Uintlist_T *right_ambcoords_antisense,
+ Intlist_T *right_amb_knowni_sense, Intlist_T *right_amb_knowni_antisense,
+ Intlist_T *right_amb_nmismatchesi_sense, Intlist_T *right_amb_nmismatchesi_antisense,
+ Intlist_T *right_amb_nmismatchesj_sense, Intlist_T *right_amb_nmismatchesj_antisense,
+ Doublelist_T *right_amb_probsi_sense, Doublelist_T *right_amb_probsi_antisense,
+ Doublelist_T *right_amb_probsj_sense, Doublelist_T *right_amb_probsj_antisense,
+
+ List_T *left_paths, Intlist_T *left_endpoints_sense, Intlist_T *left_endpoints_antisense,
+ Intlist_T *left_querystarts_sense, Intlist_T *left_querystarts_antisense,
+ Uintlist_T *left_ambcoords_sense, Uintlist_T *left_ambcoords_antisense,
+ Intlist_T *left_amb_knowni_sense, Intlist_T *left_amb_knowni_antisense,
+ Intlist_T *left_amb_nmismatchesi_sense, Intlist_T *left_amb_nmismatchesi_antisense,
+ Intlist_T *left_amb_nmismatchesj_sense, Intlist_T *left_amb_nmismatchesj_antisense,
+ Doublelist_T *left_amb_probsi_sense, Doublelist_T *left_amb_probsi_antisense,
+ Doublelist_T *left_amb_probsj_sense, Doublelist_T *left_amb_probsj_antisense,
+
+ List_T *fillin_diagonals,
+
+ Univdiag_T middle_diagonal, List_T best_right_diagonals, List_T best_left_diagonals,
+
+ char *queryptr, int querylength, Compress_T query_compress,
+ Univcoord_T chroffset, Univcoord_T chrhigh,
+ Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, bool plusp, int genestrand,
+ bool first_read_p) {
+ List_T middle_path;
+ List_T p;
+
+ List_T diagonal_path, ambig_path;
+ Univdiag_T diagonal, common_diagonal, prev_diagonal, right_indel_diagonal = NULL, left_indel_diagonal = NULL;
+ Diag_T sub_diagonal;
+ int nbest;
+
+ List_T sub_diagonals;
+ int querystart, queryend;
+ bool *coveredp;
+ Univcoord_T mappingstart, mappingend, left, prev_left, ambig_left;
+ Chrpos_T **mappings, chrstart, chrend;
+ int *npositions, totalpositions = 0;
+ int maxnconsecutive = 0;
+ Oligoindex_T oligoindex;
+ bool oned_matrix_p;
+ int indexsize;
+
+ Chrpos_T splice_distance;
+ int max_mismatches_allowed;
+ int splice_pos;
+ int best_knowni_i, best_knowni_j, best_nmismatches_i, best_nmismatches_j;
+ double best_prob_i, best_prob_j;
+
+ int segmenti_donor_nknown, segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown, segmenti_antiacceptor_nknown;
+#ifdef HAVE_ALLOCA
+ int *segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+#else
+ int segmenti_donor_knownpos[MAX_READLENGTH+1], segmentj_acceptor_knownpos[MAX_READLENGTH+1],
+ segmentj_antidonor_knownpos[MAX_READLENGTH+1], segmenti_antiacceptor_knownpos[MAX_READLENGTH+1];
+ int segmenti_donor_knowni[MAX_READLENGTH+1], segmentj_acceptor_knowni[MAX_READLENGTH+1],
+ segmentj_antidonor_knowni[MAX_READLENGTH+1], segmenti_antiacceptor_knowni[MAX_READLENGTH+1];
+#endif
+
+ int j;
+
+ debug13(printf("***Entered find_best_path\n"));
+
+ coveredp = (bool *) CALLOCA(querylength,sizeof(bool));
+ mappings = (Chrpos_T **) MALLOCA(querylength * sizeof(Chrpos_T *));
+ npositions = (int *) CALLOCA(querylength,sizeof(int));
+ oligoindex = Oligoindex_array_elt(oligoindices_minor,/*source*/0);
+ indexsize = Oligoindex_indexsize(oligoindex);
+
+
+ /* A3. Traceback for dynamic programming */
+ *right_endpoints_sense = *right_endpoints_antisense = (Intlist_T) NULL;
+ *right_queryends_sense = *right_queryends_antisense = (Intlist_T) NULL;
+ *right_ambcoords_sense = *right_ambcoords_antisense = (Uintlist_T) NULL;
+ *right_amb_knowni_sense = *right_amb_knowni_antisense = (Intlist_T) NULL;
+ *right_amb_nmismatchesi_sense = *right_amb_nmismatchesi_antisense = (Intlist_T) NULL;
+ *right_amb_nmismatchesj_sense = *right_amb_nmismatchesj_antisense = (Intlist_T) NULL;
+ *right_amb_probsi_sense = *right_amb_probsi_antisense = (Doublelist_T) NULL;
+ *right_amb_probsj_sense = *right_amb_probsj_antisense = (Doublelist_T) NULL;
+
+ *right_paths = (List_T) NULL;
+ if ((nbest = List_length(best_right_diagonals)) == 0) {
+ common_diagonal = (Univdiag_T) NULL;
+
+ querystart = middle_diagonal->queryend + 1;
+ left = middle_diagonal->univdiagonal;
+
+ } else if (nbest == 1) {
+ common_diagonal = (Univdiag_T) List_head(best_right_diagonals);
+
+ querystart = common_diagonal->queryend + 1;
+ left = common_diagonal->univdiagonal;
+
+ } else {
+ debug13(printf("Multiple (%d) best right diagonals\n",nbest));
+
+ /* Distinguish between common and divergent diagonals */
+ for (p = best_right_diagonals; p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ while (diagonal != NULL) {
+ diagonal->nlinked += 1;
+ diagonal = diagonal->prev;
+ }
+ }
+
+ /* Handle divergent diagonals */
+ /* Now that we are running oligoindex, we may need to obtain only the last common_diagonal */
+ for (p = best_right_diagonals; p != NULL; p = List_next(p)) {
+ ambig_path = (List_T) NULL;
+ diagonal = (Univdiag_T) List_head(p);
+ while (diagonal != NULL && diagonal->nlinked < nbest) {
+ ambig_path = List_push(ambig_path,(void *) diagonal);
+ diagonal = diagonal->prev;
+ }
+ *right_paths = List_push(*right_paths,(void *) ambig_path);
+
+ common_diagonal = diagonal; /* Last elt on prev path. Save for later */
+ }
+
+ if (common_diagonal == NULL) {
+ /* All paths connect directly to the middle diagonal, so there is no common diagonal */
+ prev_diagonal = middle_diagonal;
+ querystart = middle_diagonal->queryend + 1;
+ prev_left = middle_diagonal->univdiagonal;
+ } else {
+ prev_diagonal = common_diagonal;
+ querystart = common_diagonal->queryend + 1;
+ prev_left = common_diagonal->univdiagonal;
+ }
+
+ /* Distinguish right paths by looking for indel (which wins) or splicing */
+ debug13(printf("Have %d right_paths\n",List_length(*right_paths)));
+ for (p = *right_paths; p != NULL; p = List_next(p)) {
+ ambig_path = (List_T) List_head(p);
+ diagonal = (Univdiag_T) List_head(ambig_path);
+ left = diagonal->univdiagonal;
+ if (left < prev_left) {
+ /* Insertion */
+ right_indel_diagonal = diagonal;
+ } else if (prev_left - left < MIN_INTRONLEN) {
+ /* Deletion */
+ right_indel_diagonal = diagonal;
+ }
+ }
+
+ if (right_indel_diagonal != NULL) {
+ /* Push onto middle path later */
+ querystart = right_indel_diagonal->queryend + 1;
+ left = right_indel_diagonal->univdiagonal;
+
+ } else {
+ for (p = *right_paths; p != NULL; p = List_next(p)) {
+ ambig_path = (List_T) List_head(p);
+ diagonal = (Univdiag_T) List_head(ambig_path);
+ left = diagonal->univdiagonal;
+
+ segmenti_donor_nknown = segmenti_antiacceptor_nknown = 0;
+ if (nsplicesites > 0 &&
+ Splicetrie_splicesite_p(prev_left,/*pos5*/1,/*pos3*/querylength) == true) {
+ j = binary_search(0,nsplicesites,splicesites,prev_left);
+ while (j < nsplicesites && splicesites[j] < prev_left + querylength) {
+ if (splicetypes[j] == DONOR) {
+ debug4s(printf("Setting known donor %d for segmenti at %u\n",j,splicesites[j]));
+ segmenti_donor_knownpos[segmenti_donor_nknown] = splicesites[j] - prev_left;
+ segmenti_donor_knowni[segmenti_donor_nknown++] = j;
+ } else if (splicetypes[j] == ANTIACCEPTOR) {
+ debug4s(printf("Setting known antiacceptor %d for segmenti at %u\n",j,splicesites[j]));
+ segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = splicesites[j] - prev_left;
+ segmenti_antiacceptor_knowni[segmenti_antiacceptor_nknown++] = j;
+ }
+ j++;
+ }
+ }
+ segmenti_donor_knownpos[segmenti_donor_nknown] = querylength + 100;
+ segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = querylength + 100;
+
+ segmentj_acceptor_nknown = segmentj_antidonor_nknown = 0;
+ if (nsplicesites > 0 &&
+ Splicetrie_splicesite_p(left,/*pos5*/1,/*pos3*/querylength) == true) {
+ j = binary_search(0,nsplicesites,splicesites,left);
+ while (j < nsplicesites && splicesites[j] < left + querylength) {
+ if (splicetypes[j] == ACCEPTOR) {
+ debug4s(printf("Setting known acceptor %d for segmentj at %u\n",j,splicesites[j]));
+ segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = splicesites[j] - left;
+ segmentj_acceptor_knowni[segmentj_acceptor_nknown++] = j;
+ } else if (splicetypes[j] == ANTIDONOR) {
+ debug4s(printf("Setting known antidonor %d for segmentj at %u\n",j,splicesites[j]));
+ segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = splicesites[j] - left;
+ segmentj_antidonor_knowni[segmentj_antidonor_nknown++] = j;
+ }
+ j++;
+ }
+ }
+ segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = querylength + 100;
+ segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength + 100;
+
+ splice_distance = left - prev_left;
+ max_mismatches_allowed = (diagonal->querystart - prev_diagonal->queryend - 1);
+ debug13(printf("max_mismatches %d = %d - %d - 1\n",max_mismatches_allowed,diagonal->querystart,prev_diagonal->queryend));
+ if (prev_diagonal->intscore > 0) {
+ max_mismatches_allowed += 1;
+ }
+ if (diagonal->intscore > 0) {
+ max_mismatches_allowed += 1;
+ }
+
+ if ((splice_pos = Splice_resolve_sense(&best_knowni_i,&best_knowni_j,&best_nmismatches_i,&best_nmismatches_j,
+ &best_prob_i,&best_prob_j,
+ /*segmenti_left*/prev_left,/*segmentj_left*/left,chroffset,chroffset,
+ prev_diagonal->querystart,diagonal->queryend+1,querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,max_mismatches_allowed,
+ plusp,genestrand,first_read_p)) >= 0) {
+ *right_endpoints_sense = Intlist_push(*right_endpoints_sense,splice_pos);
+ *right_queryends_sense = Intlist_push(*right_queryends_sense,diagonal->queryend + 1);
+ *right_ambcoords_sense = Uintlist_push(*right_ambcoords_sense,left + splice_pos);
+ *right_amb_knowni_sense = Intlist_push(*right_amb_knowni_sense,best_knowni_j);
+ *right_amb_nmismatchesi_sense = Intlist_push(*right_amb_nmismatchesi_sense,best_nmismatches_i);
+ *right_amb_nmismatchesj_sense = Intlist_push(*right_amb_nmismatchesj_sense,best_nmismatches_j);
+ *right_amb_probsi_sense = Doublelist_push(*right_amb_probsi_sense,best_prob_i);
+ *right_amb_probsj_sense = Doublelist_push(*right_amb_probsj_sense,best_prob_j);
+ }
+
+ if ((splice_pos = Splice_resolve_antisense(&best_knowni_i,&best_knowni_j,&best_nmismatches_i,&best_nmismatches_j,
+ &best_prob_i,&best_prob_j,
+ /*segmenti_left*/prev_left,/*segmentj_left*/left,chroffset,chroffset,
+ prev_diagonal->querystart,diagonal->queryend+1,querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,max_mismatches_allowed,
+ plusp,genestrand,first_read_p)) >= 0) {
+ *right_endpoints_antisense = Intlist_push(*right_endpoints_antisense,splice_pos);
+ *right_queryends_antisense = Intlist_push(*right_queryends_antisense,diagonal->queryend + 1);
+ *right_ambcoords_antisense = Uintlist_push(*right_ambcoords_antisense,left + splice_pos);
+ *right_amb_knowni_antisense = Intlist_push(*right_amb_knowni_antisense,best_knowni_j);
+ *right_amb_nmismatchesi_antisense = Intlist_push(*right_amb_nmismatchesi_antisense,best_nmismatches_i);
+ *right_amb_nmismatchesj_antisense = Intlist_push(*right_amb_nmismatchesj_antisense,best_nmismatches_j);
+ *right_amb_probsi_antisense = Doublelist_push(*right_amb_probsi_antisense,best_prob_i);
+ *right_amb_probsj_antisense = Doublelist_push(*right_amb_probsj_antisense,best_prob_j);
+ }
+ }
+ }
+ }
+
+ sub_diagonals = (List_T) NULL;
+
+#ifdef SUBDIVIDE_ENDS
+ if (querystart + MIN_ENDLENGTH >= querylength) {
+ } else {
+ /* Run oligoindex here to right of common_diagonal */
+ mappingstart = subtract_bounded(left + querystart,/*minusterm*/max_insertionlen,chroffset);
+ mappingend = add_bounded(left + querylength,/*plusterm*/overall_max_distance,chrhigh);
+ chrstart = mappingstart - chroffset;
+ chrend = mappingend - chroffset;
+
+ Oligoindex_hr_tally(oligoindex,mappingstart,mappingend,/*plusp:true*/true,
+ queryptr,querystart,/*queryend*/querylength,/*chrpos*/chrstart,genestrand);
+ sub_diagonals = Oligoindex_get_mappings(NULL,coveredp,mappings,npositions,&totalpositions,
+ &oned_matrix_p,&maxnconsecutive,oligoindices_minor,oligoindex,
+ queryptr,querystart,/*queryend*/querylength,querylength,
+ chrstart,chrend,chroffset,chrhigh,/*plusp:true*/true,diagpool);
+ Oligoindex_untally(oligoindex,queryptr,querylength);
+
+ debug14(printf("Got %d sub diagonals\n",List_length(sub_diagonals)));
+#ifdef DEBUG14
+ for (p = sub_diagonals; p != NULL; p = List_next(p)) {
+ sub_diagonal = (Diag_T) List_head(p);
+ /* Need to alter oligoindex diagonal for our needs */
+ printf("%d..%d %u\n",sub_diagonal->querystart,sub_diagonal->queryend + indexsize - 1,chrstart + sub_diagonal->diagonal);
+ }
+#endif
+
+#if 0
+ /* Perform dynamic programming on these diagonals */
+ for (p = sub_diagonals; p != NULL; p = List_next(p)) {
+ diagonal = List_head(p);
+ querypos = diagonal->querystart;
+ best_score = 0;
+
+ for (q = sub_diagonals; q != p; q = List_next(q)) {
+ prev_diagonal = List_head(q);
+ if (prev_diagonal->queryend >= querypos) {
+ debug13(printf("Skipping because queryend %d >= querypos %d\n",prev_diagonal->queryend,querypos));
+ } else if (prev_diagonal->univdiagonal < low) {
+ debug13(printf("Skipping because diagonal %u < low_chrpos %u\n",prev_diagonal->diagonal,low_chrpos));
+ } else if (prev_diagonal->diagonal > high_chrpos) {
+ debug13(printf("Skipping because diagonal %u > high_chrpos %u\n",prev_diagonal->diagonal,high_chrpos));
+ } else {
+ score = prev_diagonal->intscore;
+ if (prev_diagonal->diagonal == diagonal->diagonal) {
+ score += 1;
+ }
+ if (score <= best_score) {
+ debug13(printf("Skipping because score %d <= best_score %d\n",score,best_score));
+ } else {
+ best_score = score;
+ diagonal->prev = prev_diagonal;
+ debug13(printf("Updating best score to be %d. Prev diagonal is %d..%d at %u\n",
+ best_score,prev_diagonal->querystart,prev_diagonal->queryend,prev_diagonal->diagonal));
+ }
+ }
+ }
+ }
+#endif
+
+ }
+#endif /* SUBDIVIDE_ENDS */
+
+
+ *fillin_diagonals = (List_T) NULL;
+ middle_path = (List_T) NULL;
+
+ /* A4. Process oligoindex diagonals from right */
+ if (List_length(sub_diagonals) == 0) {
+ /* Skip */
+ } else if (List_length(sub_diagonals) == 1) {
+ sub_diagonal = List_head(sub_diagonals);
+ diagonal = Univdiag_new_fillin(sub_diagonal->querystart,sub_diagonal->queryend,indexsize,
+ /*univdiagonal*/chroffset + chrstart + sub_diagonal->diagonal);
+ *fillin_diagonals = List_push(*fillin_diagonals,(void *) diagonal);
+ middle_path = List_push(middle_path,(void *) diagonal);
+ } else {
+#ifdef DEBUG13
+ printf("Have %d sub_diagonals\n",List_length(sub_diagonals));
+ for (p = sub_diagonals; p != NULL; p = List_next(p)) {
+ sub_diagonal = List_head(p);
+ printf("%d..%d %u\n",sub_diagonal->querystart,sub_diagonal->queryend,chrstart + sub_diagonal->diagonal);
+ }
+#endif
+ }
+
+ if (right_indel_diagonal != NULL) {
+ debug13(printf("Pushing right indel diagonal onto middle: query %d..%d, diagonal %u\n",
+ right_indel_diagonal->querystart,right_indel_diagonal->queryend,right_indel_diagonal->univdiagonal));
+ middle_path = List_push(middle_path,(void *) right_indel_diagonal);
+ }
+
+ /* A5. Process common diagonal from right */
+ while (common_diagonal != NULL) {
+ middle_path = List_push(middle_path,(void *) common_diagonal);
+ debug13(printf("Pushing common diagonal onto middle: query %d..%d, diagonal %u\n",
+ common_diagonal->querystart,common_diagonal->queryend,common_diagonal->univdiagonal));
+ common_diagonal = common_diagonal->prev;
+ }
+
+ /* B. Process original middle diagonal */
+ middle_path = List_push(middle_path,(void *) middle_diagonal);
+ debug13(printf("Pushing middle diagonal onto middle: query %d..%d, diagonal %u\n",
+ middle_diagonal->querystart,middle_diagonal->queryend,middle_diagonal->univdiagonal));
+
+
+ /* C3. Traceback for dynamic programming */
+ *left_endpoints_sense = *left_endpoints_antisense = (Intlist_T) NULL;
+ *left_querystarts_sense = *left_querystarts_antisense = (Intlist_T) NULL;
+ *left_ambcoords_sense = *left_ambcoords_antisense = (Uintlist_T) NULL;
+ *left_amb_knowni_sense = *left_amb_knowni_antisense = (Intlist_T) NULL;
+ *left_amb_nmismatchesi_sense = *left_amb_nmismatchesi_antisense = (Intlist_T) NULL;
+ *left_amb_nmismatchesj_sense = *left_amb_nmismatchesj_antisense = (Intlist_T) NULL;
+ *left_amb_probsi_sense = *left_amb_probsi_antisense = (Doublelist_T) NULL;
+ *left_amb_probsj_sense = *left_amb_probsj_antisense = (Doublelist_T) NULL;
+
+ *left_paths = (List_T) NULL;
+ debug13(printf("On left, have %d best_left_diagonals\n",List_length(best_left_diagonals)));
+ if ((nbest = List_length(best_left_diagonals)) == 0) {
+ common_diagonal = (Univdiag_T) NULL;
+
+ queryend = middle_diagonal->querystart;
+ left = middle_diagonal->univdiagonal;
+
+ } else if (nbest == 1) {
+ common_diagonal = (Univdiag_T) List_head(best_left_diagonals);
+
+ queryend = common_diagonal->querystart;
+ left = common_diagonal->univdiagonal;
+
+ } else {
+ debug13(printf("Multiple (%d) best left diagonals\n",nbest));
+
+ /* Distinguish between common and divergent diagonals */
+ for (p = best_left_diagonals; p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ while (diagonal != NULL) {
+ diagonal->nlinked += 1;
+ diagonal = diagonal->prev;
+ }
+ }
+
+ /* Handle divergent diagonals */
+ /* Now that we are running oligoindex, we may need to obtain only the last common_diagonal */
+ for (p = best_left_diagonals; p != NULL; p = List_next(p)) {
+ ambig_path = (List_T) NULL;
+ diagonal = (Univdiag_T) List_head(p);
+ while (diagonal != NULL && diagonal->nlinked < nbest) {
+ ambig_path = List_push(ambig_path,(void *) diagonal);
+ diagonal = diagonal->prev;
+ }
+ *left_paths = List_push(*left_paths,(void *) ambig_path);
+
+ common_diagonal = diagonal; /* Last elt on prev path. Save for later */
+ }
+
+ if (common_diagonal == NULL) {
+ /* All paths connect directly to the middle diagonal, so there is no common diagonal */
+ diagonal = middle_diagonal;
+ queryend = middle_diagonal->querystart;
+ left = middle_diagonal->univdiagonal;
+ } else {
+ diagonal = common_diagonal;
+ queryend = common_diagonal->querystart;
+ left = common_diagonal->univdiagonal;
+ }
+
+ /* Distinguish left paths by looking for indel (which wins) or splicing */
+ debug13(printf("Have %d left_paths\n",List_length(*left_paths)));
+ for (p = *left_paths; p != NULL; p = List_next(p)) {
+ ambig_path = (List_T) List_head(p);
+ prev_diagonal = (Univdiag_T) List_head(ambig_path);
+ prev_left = prev_diagonal->univdiagonal;
+ if (left < prev_left) {
+ /* Insertion */
+ left_indel_diagonal = prev_diagonal;
+ } else if (prev_left - left < MIN_INTRONLEN) {
+ /* Deletion */
+ left_indel_diagonal = prev_diagonal;
+ }
+ }
+
+ if (left_indel_diagonal != NULL) {
+ /* Push onto middle path later */
+ left = left_indel_diagonal->univdiagonal;
+ queryend = left_indel_diagonal->querystart;
+
+ } else {
+ for (p = *left_paths; p != NULL; p = List_next(p)) {
+ ambig_path = (List_T) List_head(p);
+ prev_diagonal = (Univdiag_T) List_head(ambig_path);
+ prev_left = prev_diagonal->univdiagonal;
+
+ segmenti_donor_nknown = segmenti_antiacceptor_nknown = 0;
+ if (nsplicesites > 0 &&
+ Splicetrie_splicesite_p(prev_left,/*pos5*/1,/*pos3*/querylength) == true) {
+ j = binary_search(0,nsplicesites,splicesites,prev_left);
+ while (j < nsplicesites && splicesites[j] < prev_left + querylength) {
+ if (splicetypes[j] == DONOR) {
+ debug4s(printf("Setting known donor %d for segmenti at %u\n",j,splicesites[j]));
+ segmenti_donor_knownpos[segmenti_donor_nknown] = splicesites[j] - prev_left;
+ segmenti_donor_knowni[segmenti_donor_nknown++] = j;
+ } else if (splicetypes[j] == ANTIACCEPTOR) {
+ debug4s(printf("Setting known antiacceptor %d for segmenti at %u\n",j,splicesites[j]));
+ segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = splicesites[j] - prev_left;
+ segmenti_antiacceptor_knowni[segmenti_antiacceptor_nknown++] = j;
+ }
+ j++;
+ }
+ }
+ segmenti_donor_knownpos[segmenti_donor_nknown] = querylength + 100;
+ segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = querylength + 100;
+
+ segmentj_acceptor_nknown = segmentj_antidonor_nknown = 0;
+ if (nsplicesites > 0 &&
+ Splicetrie_splicesite_p(left,/*pos5*/1,/*pos3*/querylength) == true) {
+ j = binary_search(0,nsplicesites,splicesites,left);
+ while (j < nsplicesites && splicesites[j] < left + querylength) {
+ if (splicetypes[j] == ACCEPTOR) {
+ debug4s(printf("Setting known acceptor %d for segmentj at %u\n",j,splicesites[j]));
+ segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = splicesites[j] - left;
+ segmentj_acceptor_knowni[segmentj_acceptor_nknown++] = j;
+ } else if (splicetypes[j] == ANTIDONOR) {
+ debug4s(printf("Setting known antidonor %d for segmentj at %u\n",j,splicesites[j]));
+ segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = splicesites[j] - left;
+ segmentj_antidonor_knowni[segmentj_antidonor_nknown++] = j;
+ }
+ j++;
+ }
+ }
+ segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = querylength + 100;
+ segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength + 100;
+
+ splice_distance = left - prev_left;
+ max_mismatches_allowed = (diagonal->querystart - prev_diagonal->queryend - 1);
+ debug13(printf("max_mismatches %d = %d - %d - 1\n",max_mismatches_allowed,diagonal->querystart,prev_diagonal->queryend));
+ if (prev_diagonal->intscore > 0) {
+ max_mismatches_allowed += 1;
+ }
+ if (diagonal->intscore > 0) {
+ max_mismatches_allowed += 1;
+ }
+
+ if ((splice_pos = Splice_resolve_sense(&best_knowni_i,&best_knowni_j,&best_nmismatches_i,&best_nmismatches_j,
+ &best_prob_i,&best_prob_j,
+ /*segmenti_left*/prev_left,/*segmentj_left*/left,chroffset,chroffset,
+ prev_diagonal->querystart,diagonal->queryend+1,querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,max_mismatches_allowed,
+ plusp,genestrand,first_read_p)) >= 0) {
+ *left_endpoints_sense = Intlist_push(*left_endpoints_sense,splice_pos);
+ *left_querystarts_sense = Intlist_push(*left_querystarts_sense,prev_diagonal->querystart);
+ *left_ambcoords_sense = Uintlist_push(*left_ambcoords_sense,prev_left + splice_pos);
+ *left_amb_knowni_sense = Intlist_push(*left_amb_knowni_sense,best_knowni_i);
+ *left_amb_nmismatchesi_sense = Intlist_push(*left_amb_nmismatchesi_sense,best_nmismatches_i);
+ *left_amb_nmismatchesj_sense = Intlist_push(*left_amb_nmismatchesj_sense,best_nmismatches_j);
+ *left_amb_probsi_sense = Doublelist_push(*left_amb_probsi_sense,best_prob_i);
+ *left_amb_probsj_sense = Doublelist_push(*left_amb_probsj_sense,best_prob_j);
+ }
+
+ if ((splice_pos = Splice_resolve_antisense(&best_knowni_i,&best_knowni_j,&best_nmismatches_i,&best_nmismatches_j,
+ &best_prob_i,&best_prob_j,
+ /*segmenti_left*/prev_left,/*segmentj_left*/left,chroffset,chroffset,
+ prev_diagonal->querystart,diagonal->queryend+1,querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,max_mismatches_allowed,
+ plusp,genestrand,first_read_p)) >= 0) {
+ *left_endpoints_antisense = Intlist_push(*left_endpoints_antisense,splice_pos);
+ *left_querystarts_antisense = Intlist_push(*left_querystarts_antisense,prev_diagonal->querystart);
+ *left_ambcoords_antisense = Uintlist_push(*left_ambcoords_antisense,prev_left + splice_pos);
+ *left_amb_knowni_antisense = Intlist_push(*left_amb_knowni_antisense,best_knowni_i);
+ *left_amb_nmismatchesi_antisense = Intlist_push(*left_amb_nmismatchesi_antisense,best_nmismatches_i);
+ *left_amb_nmismatchesj_antisense = Intlist_push(*left_amb_nmismatchesj_antisense,best_nmismatches_j);
+ *left_amb_probsi_antisense = Doublelist_push(*left_amb_probsi_antisense,best_prob_i);
+ *left_amb_probsj_antisense = Doublelist_push(*left_amb_probsj_antisense,best_prob_j);
+ }
+ }
+ }
+ }
+
+
+ sub_diagonals = (List_T) NULL;
+
+#ifdef SUBDIVIDE_ENDS
+ /* Run oligoindex here to left of common_diagonal */
+ if (queryend < MIN_ENDLENGTH) {
+ } else {
+ mappingstart = subtract_bounded(left + 0,/*minusterm*/overall_max_distance,chroffset);
+ mappingend = add_bounded(left + queryend,/*plusterm*/max_insertionlen,chrhigh);
+ chrstart = mappingstart - chroffset;
+ chrend = mappingend - chroffset;
+
+ Oligoindex_hr_tally(oligoindex,mappingstart,mappingend,/*plusp:true*/true,
+ queryptr,/*querystart*/0,queryend,/*chrpos*/chrstart,genestrand);
+ sub_diagonals = Oligoindex_get_mappings(NULL,coveredp,mappings,npositions,&totalpositions,
+ &oned_matrix_p,&maxnconsecutive,oligoindices_minor,oligoindex,
+ queryptr,/*querystart*/0,queryend,querylength,
+ chrstart,chrend,chroffset,chrhigh,/*plusp:true*/true,diagpool);
+ Oligoindex_untally(oligoindex,queryptr,querylength);
+
+ debug14(printf("Got %d sub diagonals\n",List_length(sub_diagonals)));
+#ifdef DEBUG14
+ for (p = sub_diagonals; p != NULL; p = List_next(p)) {
+ sub_diagonal = (Diag_T) List_head(p);
+ /* Need to alter oligoindex diagonal for our needs */
+ printf("%d..%d %u\n",sub_diagonal->querystart,sub_diagonal->queryend + indexsize - 1,chrstart + sub_diagonal->diagonal);
+ }
+#endif
+ /* Need to perform dynamic programming on these diagonals, or select one */
+ }
+#endif /* SUBDIVIDE_ENDS */
+
+
+ diagonal_path = (List_T) NULL;
+
+ /* C5. Process left diagonals in reverse */
+ while (common_diagonal != NULL) {
+ diagonal_path = List_push(diagonal_path,(void *) common_diagonal);
+ common_diagonal = common_diagonal->prev;
+ }
+ /* Pops off in reverse */
+ for (p = diagonal_path; p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ debug13(printf("Pushing common diagonal onto middle: query %d..%d, diagonal %u\n",
+ diagonal->querystart,diagonal->queryend,diagonal->univdiagonal));
+ middle_path = List_push(middle_path,(void *) diagonal);
+ }
+ List_free(&diagonal_path);
+
+
+ if (left_indel_diagonal != NULL) {
+ debug13(printf("Pushing left indel diagonal onto middle: query %d..%d, diagonal %u\n",
+ left_indel_diagonal->querystart,left_indel_diagonal->queryend,left_indel_diagonal->univdiagonal));
+ middle_path = List_push(middle_path,(void *) left_indel_diagonal);
+ }
+
+
+ /* C4. Process oligoindex diagonals from left */
+ if (List_length(sub_diagonals) == 0) {
+ /* Skip */
+ } else if (List_length(sub_diagonals) == 1) {
+ sub_diagonal = List_head(sub_diagonals);
+ diagonal = Univdiag_new_fillin(sub_diagonal->querystart,sub_diagonal->queryend,indexsize,
+ /*univdiagonal*/chroffset + chrstart + sub_diagonal->diagonal);
+ *fillin_diagonals = List_push(*fillin_diagonals,(void *) diagonal);
+ middle_path = List_push(middle_path,(void *) diagonal);
+ } else {
+#ifdef DEBUG13
+ printf("Have %d sub_diagonals\n",List_length(sub_diagonals));
+ for (p = sub_diagonals; p != NULL; p = List_next(p)) {
+ sub_diagonal = (Diag_T) List_head(p);
+ printf("%d..%d %u\n",sub_diagonal->querystart,sub_diagonal->queryend,chrstart + sub_diagonal->diagonal);
+ }
+#endif
+ }
+
+ debug13(printf("***Exiting find_best_path\n"));
+
+ return middle_path;
+}
+
+
+
+/* Note: This GMAP from sarray suffers from relying on middle_path and
+end paths to get stage2. Would be better to run oligoindex_hr to get
+a better stage2, or to run GMAP from GSNAP or pairsearch */
+
+static List_T
+run_gmap_plus (List_T gmap, List_T middle_path, List_T start_paths, List_T end_paths,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
+ Chrpos_T chrlength, char *queryuc_ptr, int querylength,
+ int genestrand, bool first_read_p,
+ int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
+ Stage3end_T hit;
+ List_T stage2pairs, all_stage2_starts, all_stage2_ends;
+ List_T p, q;
+
+ int sensedir;
+
+ struct Pair_T *pairarray;
+ List_T pairs;
+ List_T diagonal_path;
+ Univdiag_T diagonal, prev_diagonal;
+ int querypos;
+ Chrpos_T genomepos;
+ int c;
+
+ int npairs, goodness, cdna_direction, matches, nmatches_posttrim,
+ max_match_length, ambig_end_length_5, ambig_end_length_3,
+ unknowns, mismatches, qopens, qindels, topens, tindels,
+ ncanonical, nsemicanonical, nnoncanonical;
+ double ambig_prob_5, ambig_prob_3, min_splice_prob;
+ Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
+ Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
+ Univcoord_T start, end;
+ int nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
+
+
+ /* D. Make all_stage2_starts (paths) */
+ all_stage2_starts = (List_T) NULL;
+ diagonal = (Univdiag_T) List_head(middle_path);
+ for (q = start_paths; q != NULL; q = List_next(q)) {
+ q->first = diagonal_path = List_reverse((List_T) List_head(q));
+ prev_diagonal = (Univdiag_T) List_head(diagonal_path);
+ if (diagonal->univdiagonal > prev_diagonal->univdiagonal) {
+ debug13(printf("START, PLUS\n"));
+ stage2pairs = (List_T) NULL;
+ for (p = diagonal_path; p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ debug13(printf("Diagonal %d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal));
+ querypos = diagonal->querystart;
+ genomepos = diagonal->univdiagonal + diagonal->querystart - chroffset;
+ while (querypos <= diagonal->queryend) {
+ c = queryuc_ptr[querypos];
+ stage2pairs = Pairpool_push(stage2pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,MATCH_COMP,/*genome*/c,/*genomealt*/c,
+ /*dynprogindex*/0);
+ debug13(printf("Pushing %c | %c at %d,%d\n",queryuc_ptr[querypos],queryuc_ptr[querypos],querypos,genomepos));
+ querypos++;
+ genomepos++;
+ }
+ debug13(printf("\n"));
+ }
+ all_stage2_starts = List_push(all_stage2_starts,(void *) stage2pairs);
+ }
+ }
+
+
+ /* E. Make all_stage2_ends (pairs) */
+ all_stage2_ends = (List_T) NULL;
+ prev_diagonal = (Univdiag_T) List_last_value(middle_path);
+ for (q = end_paths; q != NULL; q = List_next(q)) {
+ diagonal_path = (List_T) List_head(q);
+ diagonal = (Univdiag_T) List_head(diagonal_path);
+ if (diagonal->univdiagonal > prev_diagonal->univdiagonal) {
+ debug13(printf("END, PLUS\n"));
+ stage2pairs = (List_T) NULL;
+ for (p = diagonal_path; p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ debug13(printf("Diagonal %d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal));
+ querypos = diagonal->querystart;
+ genomepos = diagonal->univdiagonal + diagonal->querystart - chroffset;
+ while (querypos <= diagonal->queryend) {
+ c = queryuc_ptr[querypos];
+ stage2pairs = Pairpool_push(stage2pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,MATCH_COMP,/*genome*/c,/*genomealt*/c,
+ /*dynprogindex*/0);
+ debug13(printf("Pushing %c | %c at %d,%d\n",queryuc_ptr[querypos],queryuc_ptr[querypos],querypos,genomepos));
+ querypos++;
+ genomepos++;
+ }
+ debug13(printf("\n"));
+ }
+ all_stage2_ends = List_push(all_stage2_ends,(void *) List_reverse(stage2pairs));
+ }
+ }
+
+
+#ifdef DEBUG13
+ printf("MIDDLE DIAGONALS, PLUS\n");
+ for (p = middle_path; p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ printf("Diagonal %d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal);
+ }
+#endif
+
+ /* F. Make stage2pairs */
+ stage2pairs = (List_T) NULL;
+ for (p = middle_path; p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ querypos = diagonal->querystart;
+ genomepos = diagonal->univdiagonal + diagonal->querystart - chroffset;
+ while (querypos <= diagonal->queryend) {
+ c = queryuc_ptr[querypos];
+ stage2pairs = Pairpool_push(stage2pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,MATCH_COMP,/*genome*/c,/*genomealt*/c,
+ /*dynprogindex*/0);
+ debug13(printf("Pushing %c | %c at %d,%d\n",queryuc_ptr[querypos],queryuc_ptr[querypos],querypos,genomepos));
+ querypos++;
+ genomepos++;
+ }
+ debug13(printf("\n"));
+ }
+
+
+ knownsplice_limit_high = ((Pair_T) stage2pairs->first)->genomepos + chroffset;
+ stage2pairs = List_reverse(stage2pairs);
+ knownsplice_limit_low = ((Pair_T) stage2pairs->first)->genomepos + chroffset;
+
+ if ((pairarray = Stage3_compute(&pairs,&npairs,&goodness,&cdna_direction,&sensedir,
+ &matches,&nmatches_posttrim,&max_match_length,
+ &ambig_end_length_5,&ambig_end_length_3,
+ &ambig_splicetype_5,&ambig_splicetype_3,
+ &ambig_prob_5,&ambig_prob_3,
+ &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
+ &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,
+ stage2pairs,all_stage2_starts,all_stage2_ends,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
+ watsonp ? query_compress_fwd : query_compress_rev,
+#endif
+ /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
+#ifdef EXTRACT_GENOMICSEG
+ /*query_subseq_offset*/0,
+#else
+ /*query_subseq_offset*/0,
+#endif
+ chrnum,chroffset,chrhigh,
+ knownsplice_limit_low,knownsplice_limit_high,/*plusp*/true,genestrand,
+ /*jump_late_p*/false,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ /*sense_try*/0,/*sense_filter*/0,
+ oligoindices_minor,diagpool,cellpool)) == NULL) {
+
+ } else {
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray,npairs);
+ start = subtract_bounded(chroffset + Pair_genomepos(&(pairarray[0])),
+ /*minusterm*/Pair_querypos(&(pairarray[0])),chroffset);
+ end = add_bounded(chroffset + Pair_genomepos(&(pairarray[npairs-1])),
+ /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),chrhigh);
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
+ ambig_end_length_5,ambig_end_length_3,
+ ambig_splicetype_5,ambig_splicetype_3,
+ ambig_prob_5,ambig_prob_3,min_splice_prob,
+ pairarray,npairs,nsegments,nintrons,nindelbreaks,
+ /*left*/start,/*genomiclength*/end - start + 1,
+ /*plusp*/true,genestrand,first_read_p,
+ /*accession*/NULL,querylength,chrnum,chroffset,chrhigh,chrlength,
+ cdna_direction,sensedir,/*sarrayp*/true)) == NULL) {
+ FREE_OUT(pairarray);
+ } else {
+ gmap = List_push(gmap,(void *) hit);
+ }
+ }
+
+ List_free(&all_stage2_ends);
+ List_free(&all_stage2_starts);
+
+ return gmap;
+}
+
+
+static List_T
+run_gmap_minus (List_T gmap, List_T middle_path, List_T start_paths, List_T end_paths,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
+ Chrpos_T chrlength, char *queryuc_ptr, int querylength,
+ int genestrand, bool first_read_p,
+ int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
+ Stage3end_T hit;
+ List_T stage2pairs, all_stage2_starts, all_stage2_ends;
+
+ List_T p, q;
+
+ int sensedir;
+
+ struct Pair_T *pairarray;
+ List_T pairs;
+ List_T diagonal_path;
+ Univdiag_T diagonal, prev_diagonal;
+ int querypos;
+ Chrpos_T genomepos;
+ int c;
+
+ int npairs, goodness, cdna_direction, matches, nmatches_posttrim,
+ max_match_length, ambig_end_length_5, ambig_end_length_3,
+ unknowns, mismatches, qopens, qindels, topens, tindels,
+ ncanonical, nsemicanonical, nnoncanonical;
+ double ambig_prob_5, ambig_prob_3, min_splice_prob;
+ Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
+ Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
+ Univcoord_T start, end;
+ int nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
+
+
+ /* D. Make all_stage2_starts (paths) */
+ all_stage2_starts = (List_T) NULL;
+ diagonal = (Univdiag_T) List_head(middle_path);
+ for (q = start_paths; q != NULL; q = List_next(q)) {
+ q->first = diagonal_path = List_reverse((List_T) List_head(q));
+ prev_diagonal = (Univdiag_T) List_head(diagonal_path);
+ if (diagonal->univdiagonal < prev_diagonal->univdiagonal) {
+ debug13(printf("START, MINUS\n"));
+ stage2pairs = (List_T) NULL;
+ for (p = diagonal_path; p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ debug13(printf("Diagonal %d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal));
+ querypos = querylength - 1 - diagonal->queryend;
+ genomepos = chrhigh - (diagonal->univdiagonal + diagonal->queryend);
+ while (querypos <= querylength - 1 - diagonal->querystart) {
+ c = queryuc_ptr[querypos];
+ stage2pairs = Pairpool_push(stage2pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,MATCH_COMP,/*genome*/c,/*genomealt*/c,
+ /*dynprogindex*/0);
+ debug13(printf("Pushing %c | %c at %d,%d\n",queryuc_ptr[querypos],queryuc_ptr[querypos],querypos,genomepos));
+ querypos++;
+ genomepos++;
+ }
+ debug13(printf("\n"));
+ }
+ all_stage2_starts = List_push(all_stage2_starts,(void *) stage2pairs);
+ }
+ }
+
+
+ /* E. Make all_stage2_ends (pairs) */
+ all_stage2_ends = (List_T) NULL;
+ prev_diagonal = (Univdiag_T) List_last_value(middle_path);
+ for (q = end_paths; q != NULL; q = List_next(q)) {
+ diagonal_path = (List_T) List_head(q);
+ diagonal = (Univdiag_T) List_head(diagonal_path);
+ if (diagonal->univdiagonal < prev_diagonal->univdiagonal) {
+ debug13(printf("END, MINUS\n"));
+ stage2pairs = (List_T) NULL;
+ for (p = diagonal_path; p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ debug13(printf("Diagonal %d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal));
+ querypos = querylength - 1 - diagonal->queryend;
+ genomepos = chrhigh - (diagonal->univdiagonal + diagonal->queryend);
+ while (querypos <= querylength - 1 - diagonal->querystart) {
+ c = queryuc_ptr[querypos];
+ stage2pairs = Pairpool_push(stage2pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,MATCH_COMP,/*genome*/c,/*genomealt*/c,
+ /*dynprogindex*/0);
+ debug13(printf("Pushing %c | %c at %d,%d\n",queryuc_ptr[querypos],queryuc_ptr[querypos],querypos,genomepos));
+ querypos++;
+ genomepos++;
+ }
+ debug13(printf("\n"));
+ }
+ all_stage2_ends = List_push(all_stage2_ends,(void *) List_reverse(stage2pairs));
+ }
+ }
+
+
+#ifdef DEBUG13
+ printf("MIDDLE DIAGONALS, MINUS\n");
+ for (p = middle_path; p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ printf("Diagonal %d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal);
+ }
+#endif
+
+ /* F. Make stage2pairs */
+ stage2pairs = (List_T) NULL;
+ middle_path = List_reverse(middle_path); /* For minus */
+ for (p = middle_path; p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ querypos = querylength - 1 - diagonal->queryend;
+ assert(chrhigh > diagonal->univdiagonal + diagonal->queryend);
+ genomepos = chrhigh - (diagonal->univdiagonal + diagonal->queryend);
+ while (querypos <= querylength - 1 - diagonal->querystart) {
+ c = queryuc_ptr[querypos];
+ stage2pairs = Pairpool_push(stage2pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,MATCH_COMP,/*genome*/c,/*genomealt*/c,
+ /*dynprogindex*/0);
+ debug13(printf("Pushing %c | %c at %d,%d\n",queryuc_ptr[querypos],queryuc_ptr[querypos],querypos,genomepos));
+ querypos++;
+ genomepos++;
+ }
+ debug13(printf("\n"));
+ }
+
+
+ knownsplice_limit_low = ((Pair_T) stage2pairs->first)->genomepos + chroffset;
+ stage2pairs = List_reverse(stage2pairs);
+ knownsplice_limit_high = ((Pair_T) stage2pairs->first)->genomepos + chroffset;
+
+
+ if ((pairarray = Stage3_compute(&pairs,&npairs,&goodness,&cdna_direction,&sensedir,
+ &matches,&nmatches_posttrim,&max_match_length,
+ &ambig_end_length_5,&ambig_end_length_3,
+ &ambig_splicetype_5,&ambig_splicetype_3,
+ &ambig_prob_5,&ambig_prob_3,
+ &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
+ &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,
+ stage2pairs,all_stage2_starts,all_stage2_ends,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
+ watsonp ? query_compress_fwd : query_compress_rev,
+#endif
+ /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
+#ifdef EXTRACT_GENOMICSEG
+ /*query_subseq_offset*/0,
+#else
+ /*query_subseq_offset*/0,
+#endif
+ chrnum,chroffset,chrhigh,
+ knownsplice_limit_low,knownsplice_limit_high,/*plusp*/false,genestrand,
+ /*jump_late_p*/true,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ /*sense_try*/0,/*sense_filter*/0,
+ oligoindices_minor,diagpool,cellpool)) == NULL) {
+
+ } else {
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray,npairs);
+ start = add_bounded(chroffset + Pair_genomepos(&(pairarray[0])),
+ /*plusterm*/Pair_querypos(&(pairarray[0])),chrhigh);
+ end = subtract_bounded(chroffset + Pair_genomepos(&(pairarray[npairs-1])),
+ /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),chroffset);
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
+ ambig_end_length_5,ambig_end_length_3,
+ ambig_splicetype_5,ambig_splicetype_3,
+ ambig_prob_5,ambig_prob_3,min_splice_prob,
+ pairarray,npairs,nsegments,nintrons,nindelbreaks,
+ /*left*/end,/*genomiclength*/start - end + 1,
+ /*plusp*/false,genestrand,first_read_p,
+ /*accession*/NULL,querylength,chrnum,chroffset,chrhigh,chrlength,
+ cdna_direction,sensedir,/*sarrayp*/true)) == NULL) {
+ FREE_OUT(pairarray);
+ } else {
+ gmap = List_push(gmap,(void *) hit);
+ }
+ }
+
+ List_free(&all_stage2_ends);
+ List_free(&all_stage2_starts);
+
+ return gmap;
+}
+
+
+static bool
+find_sense (int *sensedir, List_T sense_junctions, List_T antisense_junctions,
+ Intlist_T sense_endpoints, Intlist_T antisense_endpoints) {
+ bool sense_acceptable_p = true, antisense_acceptable_p = true;
+ double sense_prob = 0.0, antisense_prob = 0.0;
+ Junction_T sense_junction, antisense_junction;
+ List_T p;
+ Intlist_T a;
+ int last_endpoint;
+
+ last_endpoint = -1;
+ for (a = sense_endpoints; a != NULL; a = Intlist_next(a)) {
+ if (Intlist_head(a) <= last_endpoint) {
+ sense_acceptable_p = false;
+ }
+ last_endpoint = Intlist_head(a);
+ }
+
+ last_endpoint = -1;
+ for (a = antisense_endpoints; a != NULL; a = Intlist_next(a)) {
+ if (Intlist_head(a) <= last_endpoint) {
+ antisense_acceptable_p = false;
+ }
+ last_endpoint = Intlist_head(a);
+ }
+
+ for (p = sense_junctions; p != NULL; p = List_next(p)) {
+ sense_junction = (Junction_T) List_head(p);
+ if (sense_junction == NULL) {
+ sense_acceptable_p = false;
+ } else if (Junction_type(sense_junction) == AMB_JUNCTION) {
+ /* Ignore */
+ } else {
+ sense_prob += Junction_prob(sense_junction);
+ }
+ }
+
+ for (p = antisense_junctions; p != NULL; p = List_next(p)) {
+ antisense_junction = (Junction_T) List_head(p);
+ if (antisense_junction == NULL) {
+ antisense_acceptable_p = false;
+ } else if (Junction_type(antisense_junction) == AMB_JUNCTION) {
+ /* Ignore */
+ } else {
+ antisense_prob += Junction_prob(antisense_junction);
+ }
+ }
+
+ if (sense_acceptable_p == false && antisense_acceptable_p == false) {
+ return false;
+ } else if (sense_acceptable_p == false) {
+ *sensedir = SENSE_ANTI;
+ return true;
+ } else if (antisense_acceptable_p == false) {
+ *sensedir = SENSE_FORWARD;
+ return true;
+ } else if (sense_prob > antisense_prob) {
+ *sensedir = SENSE_FORWARD;
+ return true;
+ } else if (antisense_prob > sense_prob) {
+ *sensedir = SENSE_ANTI;
+ return true;
+ } else {
+ *sensedir = SENSE_NULL;
+ return true;
+ }
+}
+
+
+static bool
+endpoints_acceptable_p (bool *intronp, List_T junctions, Intlist_T endpoints) {
+ bool acceptable_p = true;
+ Junction_T junction;
+ List_T p;
+ Intlist_T a;
+ int last_endpoint;
+
+ last_endpoint = -1;
+ for (a = endpoints; a != NULL; a = Intlist_next(a)) {
+ if (Intlist_head(a) <= last_endpoint) {
+ acceptable_p = false;
+ }
+ last_endpoint = Intlist_head(a);
+ }
+
+ *intronp = false;
+ for (p = junctions; p != NULL; p = List_next(p)) {
+ junction = (Junction_T) List_head(p);
+ if (junction == NULL) {
+ acceptable_p = false;
+ } else if (Junction_type(junction) == SPLICE_JUNCTION) {
+ *intronp = true;
+ }
+ }
+
+ return acceptable_p;
+}
+
+
+
+#if 0
+static bool
+incomplete_result_p (List_T middle_path, int querylength) {
+ Univdiag_T diagonal;
+ int querystart, queryend;
+
+ diagonal = (Univdiag_T) List_head(middle_path);
+ querystart = diagonal->querystart;
+
+ diagonal = (Univdiag_T) List_last_value(middle_path);
+ queryend = diagonal->queryend;
+
+ if (querystart > 8 || queryend < querylength - 8) {
+ return true;
+ } else {
+ return false;
+ }
+}
+#endif
+
+
+/* Always solves against plus strand of genome. Just provide either
+ queryuc/query_compress_fwd (coords measured from beginning of
+ sequence) or queryrc/query_compress_rev (coords measured from end
+ of sequence). All coordinates measured from low end.
+ Sense/antisense is with respect to the plus strand. But to
+ interface with Stage3end_new_substring command, need to flip
+ coordinates for case where queryrc aligns to plus strand. */
+
+static List_T
+solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middle_path,
+
+ Intlist_T right_endpoints_sense, Intlist_T right_endpoints_antisense,
+ Intlist_T right_queryends_sense, Intlist_T right_queryends_antisense,
+ Uintlist_T right_ambcoords_sense, Uintlist_T right_ambcoords_antisense,
+ Intlist_T right_amb_knowni_sense, Intlist_T right_amb_knowni_antisense,
+ Intlist_T right_amb_nmismatchesi_sense, Intlist_T right_amb_nmismatchesi_antisense,
+ Intlist_T right_amb_nmismatchesj_sense, Intlist_T right_amb_nmismatchesj_antisense,
+ Doublelist_T right_amb_probsi_sense, Doublelist_T right_amb_probsi_antisense,
+ Doublelist_T right_amb_probsj_sense, Doublelist_T right_amb_probsj_antisense,
+
+ Intlist_T left_endpoints_sense, Intlist_T left_endpoints_antisense,
+ Intlist_T left_querystarts_sense, Intlist_T left_querystarts_antisense,
+ Uintlist_T left_ambcoords_sense, Uintlist_T left_ambcoords_antisense,
+ Intlist_T left_amb_knowni_sense, Intlist_T left_amb_knowni_antisense,
+ Intlist_T left_amb_nmismatchesi_sense, Intlist_T left_amb_nmismatchesi_antisense,
+ Intlist_T left_amb_nmismatchesj_sense, Intlist_T left_amb_nmismatchesj_antisense,
+ Doublelist_T left_amb_probsi_sense, Doublelist_T left_amb_probsi_antisense,
+ Doublelist_T left_amb_probsj_sense, Doublelist_T left_amb_probsj_antisense,
+
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
+ Chrpos_T chrlength, int querylength, Compress_T query_compress,
+ bool plusp, int genestrand, bool first_read_p) {
+ List_T super_path, ambig_path;
+ Stage3end_T hit;
+ int sensedir, sense_sensedir, antisense_sensedir;
+ List_T substrings = NULL;
+
+ List_T p;
+ Univdiag_T diagonal, prev_diagonal, new_diagonal;
+ Chrpos_T splice_distance;
+ int querystart_for_merge, querystart, queryend, ignore;
+ int max_leftward, skip_left;
+ int nmismatches, max_mismatches_allowed;
+ bool fillin_p;
+
+ Junction_T junction;
+ int indel_pos;
+ int nindels;
+ Univcoord_T deletionpos;
+
+ int splice_pos;
+ double donor_prob, acceptor_prob;
+
+ bool sense_acceptable_p, antisense_acceptable_p, sense_intronp, antisense_intronp;
+ Univcoord_T left, prev_left;
+ Uintlist_T sense_lefts = NULL, antisense_lefts = NULL, q;
+ Intlist_T sense_nmismatches = NULL, antisense_nmismatches = NULL, x;
+ Intlist_T sense_endpoints = NULL, antisense_endpoints = NULL, r;
+ List_T sense_junctions = NULL, antisense_junctions = NULL;
+ Substring_T substring;
+
+ int best_knowni_i, best_knowni_j, best_nmismatches_i, best_nmismatches_j;
+ double best_prob_i, best_prob_j;
+
+ Substring_T right_ambig_sense, right_ambig_antisense,
+ left_ambig_sense, left_ambig_antisense;
+ int segmenti_donor_nknown, segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown, segmenti_antiacceptor_nknown;
+ int i, j;
+
+#ifdef HAVE_ALLOCA
+ int *segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+ int *segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
+#else
+ int segmenti_donor_knownpos[MAX_READLENGTH+1], segmentj_acceptor_knownpos[MAX_READLENGTH+1],
+ segmentj_antidonor_knownpos[MAX_READLENGTH+1], segmenti_antiacceptor_knownpos[MAX_READLENGTH+1];
+ int segmenti_donor_knowni[MAX_READLENGTH+1], segmentj_acceptor_knowni[MAX_READLENGTH+1],
+ segmentj_antidonor_knowni[MAX_READLENGTH+1], segmenti_antiacceptor_knowni[MAX_READLENGTH+1];
+#endif
+
+
+#ifdef DEBUG13
+ printf("\n");
+ printf("Original diagonals:\n");
+ for (p = middle_path; p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ printf("%d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal);
+ }
+ printf("\n");
+#endif
+
+ /* Step 1: Handle mismatches */
+ *completep = false;
+ super_path = (List_T) NULL;
+
+ p = middle_path;
+ prev_diagonal = (Univdiag_T) List_head(p);
+ querystart_for_merge = prev_diagonal->querystart;
+ prev_left = prev_diagonal->univdiagonal;
+ nmismatches = 0;
+ fillin_p = false;
+
+ for (p = List_next(p); p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ if ((left = diagonal->univdiagonal) == prev_left) {
+ /* Mismatch */
+ nmismatches += (diagonal->querystart - prev_diagonal->queryend - 1); /* This could be an overestimate */
+ debug13(printf("We have mismatch or mismatches between %d..%d and %d..%d. Incrementing mismatches by %d => %d\n",
+ prev_diagonal->querystart,prev_diagonal->queryend,diagonal->querystart,diagonal->queryend,
+ (diagonal->querystart - prev_diagonal->queryend - 1),nmismatches));
+ if (diagonal->nmismatches_known_p == false) {
+ fillin_p = true;
+ }
+
+ } else {
+ /* Indel or splice */
+
+ /* Handle previous segment (for prev_left) */
+ new_diagonal = Univdiag_new(querystart_for_merge,prev_diagonal->queryend,prev_diagonal->univdiagonal);
+ if (fillin_p == true || prev_diagonal->nmismatches_known_p == false) {
+ new_diagonal->intscore = 100; /* Positive score allows for many mismatches in indel/splice routines */
+ } else {
+ new_diagonal->intscore = nmismatches;
+ }
+ super_path = List_push(super_path,(void *) new_diagonal);
+
+ prev_left = left;
+ querystart_for_merge = diagonal->querystart;
+ nmismatches = 0;
+ fillin_p = false;
+ }
+
+ prev_diagonal = diagonal;
+ }
+
+ new_diagonal = Univdiag_new(querystart_for_merge,prev_diagonal->queryend,prev_diagonal->univdiagonal);
+ if (fillin_p == true || prev_diagonal->nmismatches_known_p == false) {
+ new_diagonal->intscore = 100; /* Positive score allows for many mismatches in indel/splice routines */
+ } else {
+ new_diagonal->intscore = nmismatches;
+ }
+ super_path = List_push(super_path,(void *) new_diagonal);
+
+ super_path = List_reverse(super_path);
+
+#ifdef DEBUG13
+ printf("\n");
+ printf("Super diagonals on chrnum %d:\n",chrnum);
+ for (p = super_path; p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ printf("%d..%d at %u with %d mismatches\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->intscore);
+ }
+ printf("\n");
+#endif
+
+
+ /* Step 2: Handle indels and splices */
+
+ p = super_path;
+ prev_diagonal = (Univdiag_T) List_head(p);
+ prev_left = prev_diagonal->univdiagonal;
+
+ debug13(printf("left %u for diagonal %d..%d\n",prev_left,prev_diagonal->querystart,prev_diagonal->queryend));
+
+ sense_endpoints = Intlist_push(NULL,prev_diagonal->querystart);
+ antisense_endpoints = Intlist_push(NULL,prev_diagonal->querystart);
+
+ /* Previously pushed prev_diagonal->intscore, but that is not
+ correct. Pushing -1 indicates that we need to compute the
+ value */
+ sense_nmismatches = Intlist_push(NULL,-1);
+ antisense_nmismatches = Intlist_push(NULL,-1);
+
+ for (p = List_next(p); p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ left = diagonal->univdiagonal;
+ assert(left != prev_left); /* Because we already handled mismatches above */
+
+ debug13(printf("Diagonal %d..%d at leftpos %u, diff %d\n",
+ diagonal->querystart,diagonal->queryend,left,left - prev_left));
+
+ if (left < prev_left) {
+ /* Insertion */
+ nindels = prev_left - left;
+ max_mismatches_allowed = (diagonal->querystart - prev_diagonal->queryend - 1);
+ debug13(printf("max_mismatches %d = %d - %d - 1\n",max_mismatches_allowed,diagonal->querystart,prev_diagonal->queryend));
+ if (prev_diagonal->intscore > 0) {
+ max_mismatches_allowed += 1;
+ }
+ if (diagonal->intscore > 0) {
+ max_mismatches_allowed += 1;
+ }
+ if ((indel_pos = Indel_resolve_middle_insertion(&best_nmismatches_i,&best_nmismatches_j,
+ /*left*/prev_left,/*indels*/+nindels,query_compress,
+ prev_diagonal->querystart,diagonal->queryend,querylength,
+ max_mismatches_allowed,/*plusp:true*/true,genestrand,first_read_p)) < 0) {
+ sense_junctions = List_push(sense_junctions,NULL);
+ antisense_junctions = List_push(antisense_junctions,NULL);
+ } else {
+ sense_junctions = List_push(sense_junctions,Junction_new_insertion(nindels));
+ antisense_junctions = List_push(antisense_junctions,Junction_new_insertion(nindels));
+ }
+
+ sense_nmismatches = Intlist_pop(sense_nmismatches,&ignore);
+ sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_i);
+ sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
+
+ antisense_nmismatches = Intlist_pop(antisense_nmismatches,&ignore);
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_i);
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
+
+ sense_lefts = Uintlist_push(sense_lefts,prev_left);
+ antisense_lefts = Uintlist_push(antisense_lefts,prev_left);
+
+ sense_endpoints = Intlist_push(sense_endpoints,indel_pos);
+ antisense_endpoints = Intlist_push(antisense_endpoints,indel_pos);
+ debug13(printf("insertion pos in range %d..%d is %d\n",prev_diagonal->querystart,diagonal->queryend,indel_pos));
+
+ } else if (left <= prev_left + max_deletionlen) {
+ /* Deletion */
+ nindels = left - prev_left;
+ max_mismatches_allowed = (diagonal->querystart - prev_diagonal->queryend - 1);
+ debug13(printf("max_mismatches %d = %d - %d - 1\n",max_mismatches_allowed,diagonal->querystart,prev_diagonal->queryend));
+ if (prev_diagonal->intscore > 0) {
+ max_mismatches_allowed += 1;
+ }
+ if (diagonal->intscore > 0) {
+ max_mismatches_allowed += 1;
+ }
+ if ((indel_pos = Indel_resolve_middle_deletion(&best_nmismatches_i,&best_nmismatches_j,
+ /*left*/prev_left,/*indels*/-nindels,query_compress,
+ prev_diagonal->querystart,diagonal->queryend,querylength,
+ max_mismatches_allowed,/*plusp:true*/true,genestrand,first_read_p)) < 0) {
+ sense_junctions = List_push(sense_junctions,NULL);
+ antisense_junctions = List_push(antisense_junctions,NULL);
+ } else {
+ deletionpos = prev_left + indel_pos;
+ sense_junctions = List_push(sense_junctions,Junction_new_deletion(nindels,deletionpos));
+ antisense_junctions = List_push(antisense_junctions,Junction_new_deletion(nindels,deletionpos));
+ }
+
+ sense_nmismatches = Intlist_pop(sense_nmismatches,&ignore);
+ sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_i);
+ sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
+
+ antisense_nmismatches = Intlist_pop(antisense_nmismatches,&ignore);
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_i);
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
+
+ sense_lefts = Uintlist_push(sense_lefts,prev_left);
+ antisense_lefts = Uintlist_push(antisense_lefts,prev_left);
+
+ sense_endpoints = Intlist_push(sense_endpoints,indel_pos);
+ antisense_endpoints = Intlist_push(antisense_endpoints,indel_pos);
+ debug13(printf("deletion pos in range %d..%d is %d\n",prev_diagonal->querystart,diagonal->queryend,indel_pos));
+
+ } else {
+ /* Splice */
+ segmenti_donor_nknown = segmenti_antiacceptor_nknown = 0;
+ if (nsplicesites > 0 &&
+ Splicetrie_splicesite_p(prev_left,/*pos5*/1,/*pos3*/querylength) == true) {
+ j = binary_search(0,nsplicesites,splicesites,prev_left);
+ while (j < nsplicesites && splicesites[j] < prev_left + querylength) {
+ if (splicetypes[j] == DONOR) {
+ debug4s(printf("Setting known donor %d for segmenti at %u\n",j,splicesites[j]));
+ segmenti_donor_knownpos[segmenti_donor_nknown] = splicesites[j] - prev_left;
+ segmenti_donor_knowni[segmenti_donor_nknown++] = j;
+ } else if (splicetypes[j] == ANTIACCEPTOR) {
+ debug4s(printf("Setting known antiacceptor %d for segmenti at %u\n",j,splicesites[j]));
+ segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = splicesites[j] - prev_left;
+ segmenti_antiacceptor_knowni[segmenti_antiacceptor_nknown++] = j;
+ }
+ j++;
+ }
+ }
+ segmenti_donor_knownpos[segmenti_donor_nknown] = querylength + 100;
+ segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = querylength + 100;
+
+ segmentj_acceptor_nknown = segmentj_antidonor_nknown = 0;
+ if (nsplicesites > 0 &&
+ Splicetrie_splicesite_p(left,/*pos5*/1,/*pos3*/querylength) == true) {
+ j = binary_search(0,nsplicesites,splicesites,left);
+ while (j < nsplicesites && splicesites[j] < left + querylength) {
+ if (splicetypes[j] == ACCEPTOR) {
+ debug4s(printf("Setting known acceptor %d for segmentj at %u\n",j,splicesites[j]));
+ segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = splicesites[j] - left;
+ segmentj_acceptor_knowni[segmentj_acceptor_nknown++] = j;
+ } else if (splicetypes[j] == ANTIDONOR) {
+ debug4s(printf("Setting known antidonor %d for segmentj at %u\n",j,splicesites[j]));
+ segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = splicesites[j] - left;
+ segmentj_antidonor_knowni[segmentj_antidonor_nknown++] = j;
+ }
+ j++;
+ }
+ }
+ segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = querylength + 100;
+ segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength + 100;
+
+ splice_distance = left - prev_left;
+ max_mismatches_allowed = (diagonal->querystart - prev_diagonal->queryend - 1);
+ debug13(printf("max_mismatches %d = %d - %d - 1\n",max_mismatches_allowed,diagonal->querystart,prev_diagonal->queryend));
+ if (prev_diagonal->intscore > 0) {
+ max_mismatches_allowed += 1;
+ }
+ if (diagonal->intscore > 0) {
+ max_mismatches_allowed += 1;
+ }
+
+ if ((splice_pos = Splice_resolve_sense(&best_knowni_i,&best_knowni_j,&best_nmismatches_i,&best_nmismatches_j,
+ &best_prob_i,&best_prob_j,
+ /*segmenti_left*/prev_left,/*segmentj_left*/left,chroffset,chroffset,
+ prev_diagonal->querystart,diagonal->queryend+1,querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,max_mismatches_allowed,
+ plusp,genestrand,first_read_p)) < 0) {
+ sense_endpoints = Intlist_push(sense_endpoints,-1); /* Mark as invalid */
+ sense_junctions = List_push(sense_junctions,NULL);
+ } else if (plusp == true) {
+ sense_endpoints = Intlist_push(sense_endpoints,splice_pos);
+ sense_junctions = List_push(sense_junctions,Junction_new_splice(splice_distance,SENSE_FORWARD,
+ /*donor_prob*/best_prob_i,/*acceptor_prob*/best_prob_j));
+ } else {
+ sense_endpoints = Intlist_push(sense_endpoints,splice_pos);
+ sense_junctions = List_push(sense_junctions,Junction_new_splice(splice_distance,SENSE_FORWARD,
+ /*donor_prob*/best_prob_j,/*acceptor_prob*/best_prob_i));
+ }
+ debug13(printf("sense splice_pos in range %d..%d is %d\n",prev_diagonal->querystart,diagonal->queryend,splice_pos));
+ sense_nmismatches = Intlist_pop(sense_nmismatches,&ignore);
+ sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_i);
+ sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
+ sense_lefts = Uintlist_push(sense_lefts,prev_left);
+
+ if ((splice_pos = Splice_resolve_antisense(&best_knowni_i,&best_knowni_j,&best_nmismatches_i,&best_nmismatches_j,
+ &best_prob_i,&best_prob_j,
+ /*segmenti_left*/prev_left,/*segmentj_left*/left,chroffset,chroffset,
+ prev_diagonal->querystart,diagonal->queryend+1,querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,max_mismatches_allowed,
+ plusp,genestrand,first_read_p)) < 0) {
+ antisense_endpoints = Intlist_push(antisense_endpoints,-1); /* Mark as invalid */
+ antisense_junctions = List_push(antisense_junctions,NULL);
+ } else if (plusp == true) {
+ antisense_endpoints = Intlist_push(antisense_endpoints,splice_pos);
+ antisense_junctions = List_push(antisense_junctions,Junction_new_splice(splice_distance,SENSE_ANTI,
+ /*donor_prob*/best_prob_j,/*acceptor_prob*/best_prob_i));
+ } else {
+ antisense_endpoints = Intlist_push(antisense_endpoints,splice_pos);
+ antisense_junctions = List_push(antisense_junctions,Junction_new_splice(splice_distance,SENSE_ANTI,
+ /*donor_prob*/best_prob_i,/*acceptor_prob*/best_prob_j));
+ }
+ debug13(printf("antisense splice_pos in range %d..%d is %d\n",prev_diagonal->querystart,diagonal->queryend,splice_pos));
+ antisense_nmismatches = Intlist_pop(antisense_nmismatches,&ignore);
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_i);
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
+ antisense_lefts = Uintlist_push(antisense_lefts,prev_left);
+ }
+
+ /* Handle previous segment (for prev_left) */
+ prev_left = left;
+ prev_diagonal = diagonal;
+ }
+
+ /* Finish up lists */
+ sense_lefts = Uintlist_push(sense_lefts,prev_left);
+ antisense_lefts = Uintlist_push(antisense_lefts,prev_left);
+ sense_endpoints = Intlist_push(sense_endpoints,prev_diagonal->queryend + 1);
+ antisense_endpoints = Intlist_push(antisense_endpoints,prev_diagonal->queryend + 1);
+
+
+ debug13(printf("After step 2\n"));
+ debug13(printf("sense (wrt plus): %s\n",Intlist_to_string(sense_endpoints)));
+ debug13(printf("antisense (wrt plus): %s\n",Intlist_to_string(antisense_endpoints)));
+ debug13(printf("sense nmismatches: %s\n",Intlist_to_string(sense_nmismatches)));
+ debug13(printf("antisense nmismatches: %s\n",Intlist_to_string(antisense_nmismatches)));
+
+
+ /* Step 3: Handle ambiguous ends on right */
+ right_ambig_sense = (Substring_T) NULL;
+ if (right_endpoints_sense == NULL) {
+ /* Skip */
+ } else if (Intlist_length(right_endpoints_sense) == 1) {
+ /* Only one splice on right */
+ splice_pos = Intlist_head(right_endpoints_sense);
+ queryend = Intlist_head(right_queryends_sense);
+ left = Uintlist_head(right_ambcoords_sense) - splice_pos;
+ splice_distance = left - prev_left;
+ if (plusp == true) {
+ donor_prob = Doublelist_head(right_amb_probsi_sense);
+ acceptor_prob = Doublelist_head(right_amb_probsj_sense);
+ } else {
+ acceptor_prob = Doublelist_head(right_amb_probsi_sense);
+ donor_prob = Doublelist_head(right_amb_probsj_sense);
+ }
+
+ sense_nmismatches = Intlist_pop(sense_nmismatches,&ignore);
+ sense_nmismatches = Intlist_push(sense_nmismatches,Intlist_head(right_amb_nmismatchesi_sense));
+ sense_nmismatches = Intlist_push(sense_nmismatches,Intlist_head(right_amb_nmismatchesj_sense));
+ sense_lefts = Uintlist_push(sense_lefts,left);
+
+ sense_endpoints = Intlist_pop(sense_endpoints,&ignore);
+ sense_endpoints = Intlist_push(sense_endpoints,splice_pos);
+ sense_endpoints = Intlist_push(sense_endpoints,queryend);
+ sense_junctions = List_push(sense_junctions,Junction_new_splice(splice_distance,SENSE_FORWARD,
+ donor_prob,acceptor_prob));
+
+ } else if (Intlist_vary(right_endpoints_sense) == true) {
+ /* Skip */
+ } else {
+ /* Ambiguous substring on right */
+ splice_pos = Intlist_head(right_endpoints_sense);
+ queryend = Intlist_head(right_queryends_sense); /* Should all be the same */
+
+ sense_endpoints = Intlist_pop(sense_endpoints,&ignore);
+ sense_endpoints = Intlist_push(sense_endpoints,splice_pos);
+ /* sense_endpoints = Intlist_push(sense_endpoints,queryend); */
+
+ if (plusp == true) {
+ right_ambig_sense = Substring_new_ambig(/*querystart*/splice_pos,queryend,
+ /*splice_pos*/splice_pos,querylength,
+ chrnum,chroffset,chrhigh,chrlength,
+ /*genomiclength*/querylength,plusp,genestrand,first_read_p,
+ right_ambcoords_sense,right_amb_knowni_sense,
+ right_amb_nmismatchesj_sense,right_amb_probsj_sense,
+ /*amb_common_prob*/Doublelist_head(right_amb_probsi_sense),
+ /*amb_donor_common_p*/true,/*substring1p*/false);
+ } else {
+ right_ambig_sense = Substring_new_ambig(/*querystart*/querylength - queryend,querylength - splice_pos,
+ /*splice_pos*/querylength - splice_pos,querylength,
+ chrnum,chroffset,chrhigh,chrlength,
+ /*genomiclength*/querylength,plusp,genestrand,first_read_p,
+ right_ambcoords_sense,right_amb_knowni_sense,
+ right_amb_nmismatchesj_sense,right_amb_probsj_sense,
+ /*amb_common_prob*/Doublelist_head(right_amb_probsi_sense),
+ /*amb_donor_common_p*/false,/*substring1p*/false);
+ }
+ }
+
+ if (right_ambig_sense != NULL) {
+ /* Endpoints end before ambiguous substring */
+ } else if (Intlist_head(sense_endpoints) == querylength) {
+ /* Last substring already goes to the end */
+ } else {
+ sense_endpoints = Intlist_pop(sense_endpoints,&ignore);
+ sense_endpoints = Intlist_push(sense_endpoints,querylength);
+
+ sense_nmismatches = Intlist_pop(sense_nmismatches,&ignore);
+ sense_nmismatches = Intlist_push(sense_nmismatches,-1); /* Recalculate */
+ }
+
+
+ right_ambig_antisense = (Substring_T) NULL;
+ if (right_endpoints_antisense == NULL) {
+ /* Skip */
+ } else if (Intlist_length(right_endpoints_antisense) == 1) {
+ /* Only one splice on right */
+ splice_pos = Intlist_head(right_endpoints_antisense);
+ queryend = Intlist_head(right_queryends_antisense);
+ left = Uintlist_head(right_ambcoords_antisense) - splice_pos;
+ splice_distance = left - prev_left;
+ if (plusp == true) {
+ acceptor_prob = Doublelist_head(right_amb_probsi_antisense);
+ donor_prob = Doublelist_head(right_amb_probsj_antisense);
+ } else {
+ donor_prob = Doublelist_head(right_amb_probsi_antisense);
+ acceptor_prob = Doublelist_head(right_amb_probsj_antisense);
+ }
+
+ antisense_nmismatches = Intlist_pop(antisense_nmismatches,&ignore);
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,Intlist_head(right_amb_nmismatchesi_antisense));
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,Intlist_head(right_amb_nmismatchesj_antisense));
+ antisense_lefts = Uintlist_push(antisense_lefts,left);
+
+ antisense_endpoints = Intlist_pop(antisense_endpoints,&ignore);
+ antisense_endpoints = Intlist_push(antisense_endpoints,splice_pos);
+ antisense_endpoints = Intlist_push(antisense_endpoints,queryend);
+ antisense_junctions = List_push(antisense_junctions,Junction_new_splice(splice_distance,SENSE_ANTI,
+ donor_prob,acceptor_prob));
+
+ } else if (Intlist_vary(right_endpoints_antisense) == true) {
+ /* Skip */
+ } else {
+ /* Ambiguous substring on right */
+ splice_pos = Intlist_head(right_endpoints_antisense);
+ queryend = Intlist_head(right_queryends_antisense); /* Should all be the same */
+
+ antisense_endpoints = Intlist_pop(antisense_endpoints,&ignore);
+ antisense_endpoints = Intlist_push(antisense_endpoints,splice_pos);
+ /* antisense_endpoints = Intlist_push(antisense_endpoints,queryend); */
+
+ if (plusp == true) {
+ right_ambig_antisense = Substring_new_ambig(/*querystart*/splice_pos,queryend,
+ /*splice_pos*/splice_pos,querylength,
+ chrnum,chroffset,chrhigh,chrlength,
+ /*genomiclength*/querylength,plusp,genestrand,first_read_p,
+ right_ambcoords_antisense,right_amb_knowni_antisense,
+ right_amb_nmismatchesj_antisense,right_amb_probsj_antisense,
+ /*amb_common_prob*/Doublelist_head(right_amb_probsi_antisense),
+ /*amb_donor_common_p*/false,/*substring1p*/false);
+ } else {
+ right_ambig_antisense = Substring_new_ambig(/*querystart*/querylength - queryend,querylength - splice_pos,
+ /*splice_pos*/querylength - splice_pos,querylength,
+ chrnum,chroffset,chrhigh,chrlength,
+ /*genomiclength*/querylength,plusp,genestrand,first_read_p,
+ right_ambcoords_antisense,right_amb_knowni_antisense,
+ right_amb_nmismatchesj_antisense,right_amb_probsj_antisense,
+ /*amb_common_prob*/Doublelist_head(right_amb_probsi_antisense),
+ /*amb_donor_common_p*/true,/*substring1p*/false);
+ }
+ }
- /* See if we have a substitution winner */
- Elt_fill_positions_all(best_plus_elt,plus_sarray);
- for (i = 0; i < best_plus_elt->npositions; i++) {
- left = best_plus_elt->positions[i];
- /* Should return max_mismatches + 1 if it exceeds the limit */
- if ((nmismatches = Genome_count_mismatches_limit(query_compress_fwd,left,/*pos5*/0,/*pos3*/querylength,
- /*max_mismatches*/nmisses_allowed,
- /*plusp*/true,genestrand,first_read_p)) <= nmisses_allowed) {
- chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
- Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
- debug(printf("2. Reporting hit with %d mismatches vs %d allowed\n",nmismatches,nmisses_allowed));
- if ((hit = Stage3end_new_substitution(&(*found_score),nmismatches,
- left,/*genomiclength*/querylength,
- query_compress_fwd,/*plusp*/true,genestrand,first_read_p,
- chrnum,chroffset,chrhigh,chrlength,
- /*sarrayp*/true)) != NULL) {
- *subs = List_push(*subs,(void *) hit);
- }
+ if (right_ambig_antisense != NULL) {
+ /* Endpoints end before ambiguous substring */
+ } else if (Intlist_head(antisense_endpoints) == querylength) {
+ /* Last substring already goes to the end */
+ } else {
+ antisense_endpoints = Intlist_pop(antisense_endpoints,&ignore);
+ antisense_endpoints = Intlist_push(antisense_endpoints,querylength);
+
+ antisense_nmismatches = Intlist_pop(antisense_nmismatches,&ignore);
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,-1); /* Recalculate */
+ }
+
+
+ debug13(printf("After step 3\n"));
+ debug13(printf("sense (wrt plus): %s\n",Intlist_to_string(sense_endpoints)));
+ debug13(printf("antisense (wrt plus): %s\n",Intlist_to_string(antisense_endpoints)));
+ debug13(printf("sense nmismatches: %s\n",Intlist_to_string(sense_nmismatches)));
+ debug13(printf("antisense nmismatches: %s\n",Intlist_to_string(antisense_nmismatches)));
+
+ /* Step 4: Reverse sense and antisense alignments */
+ sense_nmismatches = Intlist_reverse(sense_nmismatches);
+ antisense_nmismatches = Intlist_reverse(antisense_nmismatches);
+ sense_lefts = Uintlist_reverse(sense_lefts);
+ antisense_lefts = Uintlist_reverse(antisense_lefts);
+ sense_endpoints = Intlist_reverse(sense_endpoints);
+ antisense_endpoints = Intlist_reverse(antisense_endpoints);
+ sense_junctions = List_reverse(sense_junctions);
+ antisense_junctions = List_reverse(antisense_junctions);
+
+ debug13(printf("After step 4\n"));
+ debug13(printf("sense (wrt plus): %s\n",Intlist_to_string(sense_endpoints)));
+ debug13(printf("antisense (wrt plus): %s\n",Intlist_to_string(antisense_endpoints)));
+ debug13(printf("sense nmismatches: %s\n",Intlist_to_string(sense_nmismatches)));
+ debug13(printf("antisense nmismatches: %s\n",Intlist_to_string(antisense_nmismatches)));
+
+
+ /* Step 5: Handle ambiguous ends on left */
+ left_ambig_sense = (Substring_T) NULL;
+ if (left_endpoints_sense == NULL) {
+ /* Skip, but extend leftward */
+ if (Intlist_head(sense_endpoints) > 0) {
+ sense_endpoints = Intlist_pop(sense_endpoints,&querystart);
+ if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(sense_lefts),
+ /*pos5*/0,/*pos3*/querystart,plusp,genestrand,first_read_p)) > 0) {
+ sense_endpoints = Intlist_push(sense_endpoints,querystart - max_leftward);
+ } else if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(sense_lefts),
+ /*pos5*/0,/*pos3*/querystart-1,plusp,genestrand,first_read_p)) > 0) {
+ sense_endpoints = Intlist_push(sense_endpoints,querystart - max_leftward - 1);
+ } else {
+ sense_endpoints = Intlist_push(sense_endpoints,querystart);
+ }
+ }
+
+ } else if (Intlist_length(left_endpoints_sense) == 1) {
+ /* Only one splice on left */
+ prev_left = Uintlist_head(sense_lefts);
+ splice_pos = Intlist_head(left_endpoints_sense);
+ querystart = Intlist_head(left_querystarts_sense);
+ left = Uintlist_head(left_ambcoords_sense) - splice_pos;
+ splice_distance = prev_left - left;
+ if (plusp == true) {
+ donor_prob = Doublelist_head(left_amb_probsi_sense);
+ acceptor_prob = Doublelist_head(left_amb_probsj_sense);
+ } else {
+ acceptor_prob = Doublelist_head(left_amb_probsi_sense);
+ donor_prob = Doublelist_head(left_amb_probsj_sense);
+ }
+
+ sense_nmismatches = Intlist_pop(sense_nmismatches,&ignore);
+ sense_nmismatches = Intlist_push(sense_nmismatches,Intlist_head(left_amb_nmismatchesi_sense));
+ sense_nmismatches = Intlist_push(sense_nmismatches,Intlist_head(left_amb_nmismatchesj_sense));
+ sense_lefts = Uintlist_push(sense_lefts,left);
+
+ sense_endpoints = Intlist_pop(sense_endpoints,&ignore);
+ sense_endpoints = Intlist_push(sense_endpoints,splice_pos);
+ sense_endpoints = Intlist_push(sense_endpoints,querystart);
+ sense_junctions = List_push(sense_junctions,Junction_new_splice(splice_distance,SENSE_FORWARD,
+ donor_prob,acceptor_prob));
+
+ } else if (Intlist_vary(left_endpoints_sense) == true) {
+ /* Skip, but extend leftward */
+ sense_endpoints = Intlist_pop(sense_endpoints,&querystart);
+ if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(sense_lefts),
+ /*pos5*/0,/*pos3*/querystart,plusp,genestrand,first_read_p)) > 0) {
+ sense_endpoints = Intlist_push(sense_endpoints,querystart - max_leftward);
+ } else if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(sense_lefts),
+ /*pos5*/0,/*pos3*/querystart-1,plusp,genestrand,first_read_p)) > 0) {
+ sense_endpoints = Intlist_push(sense_endpoints,querystart - max_leftward - 1);
+ } else {
+ sense_endpoints = Intlist_push(sense_endpoints,querystart);
+ }
+
+ } else {
+ /* Ambiguous substring on left */
+ splice_pos = Intlist_head(left_endpoints_sense);
+ querystart = Intlist_head(left_querystarts_sense); /* Should all be the same */
+
+ sense_endpoints = Intlist_pop(sense_endpoints,&ignore);
+ sense_endpoints = Intlist_push(sense_endpoints,splice_pos);
+ /* sense_endpoints = Intlist_push(sense_endpoints,querystart); */
+
+ if (plusp == true) {
+ left_ambig_sense = Substring_new_ambig(querystart,/*queryend*/splice_pos,
+ /*splice_pos*/splice_pos,querylength,
+ chrnum,chroffset,chrhigh,chrlength,
+ /*genomiclength*/querylength,plusp,genestrand,first_read_p,
+ left_ambcoords_sense,left_amb_knowni_sense,
+ left_amb_nmismatchesi_sense,left_amb_probsi_sense,
+ /*amb_common_prob*/Doublelist_head(left_amb_probsj_sense),
+ /*amb_donor_common_p*/false,/*substring1p*/true);
+ } else {
+ left_ambig_sense = Substring_new_ambig(querylength - splice_pos,/*queryend*/querylength - querystart,
+ /*splice_pos*/querylength - splice_pos,querylength,
+ chrnum,chroffset,chrhigh,chrlength,
+ /*genomiclength*/querylength,plusp,genestrand,first_read_p,
+ left_ambcoords_sense,left_amb_knowni_sense,
+ left_amb_nmismatchesi_sense,left_amb_probsi_sense,
+ /*amb_common_prob*/Doublelist_head(left_amb_probsj_sense),
+ /*amb_donor_common_p*/true,/*substring1p*/true);
+ }
+ }
+
+ if (left_ambig_sense != NULL) {
+ /* Endpoints begin after ambiguous substring */
+ } else if (Intlist_head(sense_endpoints) == 0) {
+ /* First substring already goes to the beginning */
+ } else {
+ sense_endpoints = Intlist_pop(sense_endpoints,&ignore);
+ sense_endpoints = Intlist_push(sense_endpoints,0);
+
+ sense_nmismatches = Intlist_pop(sense_nmismatches,&ignore);
+ sense_nmismatches = Intlist_push(sense_nmismatches,-1); /* Recalculate */
+ }
+
+
+ left_ambig_antisense = (Substring_T) NULL;
+ if (left_endpoints_antisense == NULL) {
+ /* Skip, but extend leftward */
+ if (Intlist_head(antisense_endpoints) > 0) {
+ antisense_endpoints = Intlist_pop(antisense_endpoints,&querystart);
+ if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(antisense_lefts),
+ /*pos5*/0,/*pos3*/querystart,plusp,genestrand,first_read_p)) > 0) {
+ antisense_endpoints = Intlist_push(antisense_endpoints,querystart - max_leftward);
+ } else if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(antisense_lefts),
+ /*pos5*/0,/*pos3*/querystart-1,plusp,genestrand,first_read_p)) > 0) {
+ antisense_endpoints = Intlist_push(antisense_endpoints,querystart - max_leftward - 1);
+ } else {
+ antisense_endpoints = Intlist_push(antisense_endpoints,querystart);
+ }
+ }
+
+ } else if (Intlist_length(left_endpoints_antisense) == 1) {
+ /* Only one splice on left */
+ prev_left = Uintlist_head(antisense_lefts);
+ splice_pos = Intlist_head(left_endpoints_antisense);
+ querystart = Intlist_head(left_querystarts_antisense);
+ left = Uintlist_head(left_ambcoords_antisense) - splice_pos;
+ splice_distance = prev_left - left;
+ if (plusp == true) {
+ acceptor_prob = Doublelist_head(left_amb_probsi_antisense);
+ donor_prob = Doublelist_head(left_amb_probsj_antisense);
+ } else {
+ donor_prob = Doublelist_head(left_amb_probsi_antisense);
+ acceptor_prob = Doublelist_head(left_amb_probsj_antisense);
+ }
+
+ antisense_nmismatches = Intlist_pop(antisense_nmismatches,&ignore);
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,Intlist_head(left_amb_nmismatchesi_antisense));
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,Intlist_head(left_amb_nmismatchesj_antisense));
+ antisense_lefts = Uintlist_push(antisense_lefts,left);
+
+ antisense_endpoints = Intlist_pop(antisense_endpoints,&ignore);
+ antisense_endpoints = Intlist_push(antisense_endpoints,splice_pos);
+ antisense_endpoints = Intlist_push(antisense_endpoints,querystart);
+ antisense_junctions = List_push(antisense_junctions,Junction_new_splice(splice_distance,SENSE_ANTI,
+ donor_prob,acceptor_prob));
+
+ } else if (Intlist_vary(left_endpoints_antisense) == true) {
+ /* Skip, but extend leftward */
+ antisense_endpoints = Intlist_pop(antisense_endpoints,&querystart);
+ if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(antisense_lefts),
+ /*pos5*/0,/*pos3*/querystart,plusp,genestrand,first_read_p)) > 0) {
+ antisense_endpoints = Intlist_push(antisense_endpoints,querystart - max_leftward);
+ } else if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(antisense_lefts),
+ /*pos5*/0,/*pos3*/querystart-1,plusp,genestrand,first_read_p)) > 0) {
+ antisense_endpoints = Intlist_push(antisense_endpoints,querystart - max_leftward - 1);
+ } else {
+ antisense_endpoints = Intlist_push(antisense_endpoints,querystart);
+ }
+
+ } else {
+ /* Ambiguous substring on left */
+ splice_pos = Intlist_head(left_endpoints_antisense);
+ querystart = Intlist_head(left_querystarts_antisense); /* Should all be the same */
+
+ antisense_endpoints = Intlist_pop(antisense_endpoints,&ignore);
+ antisense_endpoints = Intlist_push(antisense_endpoints,splice_pos);
+ /* antisense_endpoints = Intlist_push(antisense_endpoints,querystart); */
+
+ if (plusp == true) {
+ left_ambig_antisense = Substring_new_ambig(querystart,/*queryend*/splice_pos,
+ /*splice_pos*/splice_pos,querylength,
+ chrnum,chroffset,chrhigh,chrlength,
+ /*genomiclength*/querylength,plusp,genestrand,first_read_p,
+ left_ambcoords_antisense,left_amb_knowni_antisense,
+ left_amb_nmismatchesi_antisense,left_amb_probsi_antisense,
+ /*amb_common_prob*/Doublelist_head(left_amb_probsj_antisense),
+ /*amb_donor_common_p*/true,/*substring1p*/true);
+ } else {
+ left_ambig_antisense = Substring_new_ambig(querylength - splice_pos,/*queryend*/querylength - querystart,
+ /*splice_pos*/querylength - splice_pos,querylength,
+ chrnum,chroffset,chrhigh,chrlength,
+ /*genomiclength*/querylength,plusp,genestrand,first_read_p,
+ left_ambcoords_antisense,left_amb_knowni_antisense,
+ left_amb_nmismatchesi_antisense,left_amb_probsi_antisense,
+ /*amb_common_prob*/Doublelist_head(left_amb_probsj_antisense),
+ /*amb_donor_common_p*/false,/*substring1p*/true);
+ }
+ }
+
+ if (left_ambig_antisense != NULL) {
+ /* Endpoints begin after ambiguous substring */
+ } else if (Intlist_head(antisense_endpoints) == 0) {
+ /* First substring already goes to the beginning */
+ } else {
+ antisense_endpoints = Intlist_pop(antisense_endpoints,&ignore);
+ antisense_endpoints = Intlist_push(antisense_endpoints,0);
+
+ antisense_nmismatches = Intlist_pop(antisense_nmismatches,&ignore);
+ antisense_nmismatches = Intlist_push(antisense_nmismatches,-1); /* Recalculate */
+ }
+
+
+ debug13(printf("After step 5\n"));
+ debug13(printf("sense (wrt plus): %s\n",Intlist_to_string(sense_endpoints)));
+ debug13(printf("antisense (wrt plus): %s\n",Intlist_to_string(antisense_endpoints)));
+ debug13(printf("sense nmismatches: %s\n",Intlist_to_string(sense_nmismatches)));
+ debug13(printf("antisense nmismatches: %s\n",Intlist_to_string(antisense_nmismatches)));
+
+#ifdef DEBUG13
+ printf("Sense junctions\n");
+ for (p = sense_junctions; p != NULL; p = List_next(p)) {
+ Junction_print(List_head(p));
+ }
+ printf("\n");
+ printf("Antisense junctions\n");
+ for (p = antisense_junctions; p != NULL; p = List_next(p)) {
+ Junction_print(List_head(p));
+ }
+ printf("\n");
+#endif
+
+
+ /* Need to rely on probability filtering in splice.c to get correct
+ results for sense and antisense */
+ sense_acceptable_p = endpoints_acceptable_p(&sense_intronp,sense_junctions,sense_endpoints);
+ antisense_acceptable_p = endpoints_acceptable_p(&antisense_intronp,antisense_junctions,
+ antisense_endpoints);
+ if (sense_acceptable_p == true && antisense_acceptable_p == true) {
+ if (sense_intronp == true || right_ambig_sense != NULL || left_ambig_sense != NULL) {
+ sense_sensedir = SENSE_FORWARD;
+ } else {
+ sense_sensedir = SENSE_NULL;
+ }
+ if (antisense_intronp == true || right_ambig_antisense != NULL || left_ambig_antisense != NULL) {
+ antisense_sensedir = SENSE_ANTI;
+ } else {
+ antisense_sensedir = SENSE_NULL;
+ }
+
+ if (sense_sensedir == SENSE_NULL && antisense_sensedir == SENSE_NULL) {
+ /* Create just one hit */
+ if ((hit = Stage3end_new_substrings(&(*found_score),sense_endpoints,sense_lefts,
+ sense_nmismatches,sense_junctions,querylength,query_compress,
+ /*right_ambig*/NULL,/*left_ambig*/NULL,plusp,genestrand,/*sensedir*/SENSE_NULL,
+ first_read_p,chrnum,chroffset,chrhigh,chrlength,/*sarrayp*/true)) == NULL) {
+ Substring_free(&right_ambig_sense);
+ Substring_free(&left_ambig_sense);
+ Junction_gc(&sense_junctions);
+ Substring_free(&right_ambig_antisense);
+ Substring_free(&left_ambig_antisense);
+ } else {
+ if (Stage3end_substrings_querystart(hit) < 8 &&
+ Stage3end_substrings_queryend(hit) >= querylength - 8) {
+ *completep = true;
+ }
+ hits = List_push(hits,(void *) hit);
+ }
+ Junction_gc(&antisense_junctions);
+
+ } else {
+ /* Create just both sense and antisense hits */
+ if ((hit = Stage3end_new_substrings(&(*found_score),sense_endpoints,sense_lefts,
+ sense_nmismatches,sense_junctions,querylength,query_compress,
+ right_ambig_sense,left_ambig_sense,plusp,genestrand,sense_sensedir,
+ first_read_p,chrnum,chroffset,chrhigh,chrlength,/*sarrayp*/true)) == NULL) {
+ Substring_free(&right_ambig_sense);
+ Substring_free(&left_ambig_sense);
+ Junction_gc(&sense_junctions);
+ } else {
+ if (Stage3end_substrings_querystart(hit) < 8 &&
+ Stage3end_substrings_queryend(hit) >= querylength - 8) {
+ *completep = true;
+ }
+ hits = List_push(hits,(void *) hit);
+ }
+
+ if ((hit = Stage3end_new_substrings(&(*found_score),antisense_endpoints,antisense_lefts,
+ antisense_nmismatches,antisense_junctions,querylength,query_compress,
+ right_ambig_antisense,left_ambig_antisense,plusp,genestrand,antisense_sensedir,
+ first_read_p,chrnum,chroffset,chrhigh,chrlength,/*sarrayp*/true)) == NULL) {
+ Substring_free(&right_ambig_antisense);
+ Substring_free(&left_ambig_antisense);
+ Junction_gc(&antisense_junctions);
+ } else {
+ if (Stage3end_substrings_querystart(hit) < 8 &&
+ Stage3end_substrings_queryend(hit) >= querylength - 8) {
+ *completep = true;
}
- debug(printf("Looking at plus position %u => %d mismatches\n",left,nmismatches));
+ hits = List_push(hits,(void *) hit);
}
}
+ } else if (sense_acceptable_p == true) {
+ if (sense_intronp == true || right_ambig_sense != NULL || left_ambig_sense != NULL) {
+ sensedir = SENSE_FORWARD;
+ } else {
+ sensedir = SENSE_NULL;
+ }
+ if ((hit = Stage3end_new_substrings(&(*found_score),sense_endpoints,sense_lefts,
+ sense_nmismatches,sense_junctions,querylength,query_compress,
+ right_ambig_sense,left_ambig_sense,plusp,genestrand,sensedir,
+ first_read_p,chrnum,chroffset,chrhigh,chrlength,/*sarrayp*/true)) == NULL) {
+ Substring_free(&right_ambig_sense);
+ Substring_free(&left_ambig_sense);
+ Junction_gc(&sense_junctions);
+ } else {
+ if (Stage3end_substrings_querystart(hit) < 8 &&
+ Stage3end_substrings_queryend(hit) >= querylength - 8) {
+ *completep = true;
+ }
+ hits = List_push(hits,(void *) hit);
+ }
+
+ Substring_free(&right_ambig_antisense);
+ Substring_free(&left_ambig_antisense);
+ Junction_gc(&antisense_junctions);
+
+ } else if (antisense_acceptable_p == true) {
+ if (antisense_intronp == true || right_ambig_antisense != NULL || left_ambig_antisense != NULL) {
+ sensedir = SENSE_ANTI;
+ } else {
+ sensedir = SENSE_NULL;
+ }
+ if ((hit = Stage3end_new_substrings(&(*found_score),antisense_endpoints,antisense_lefts,
+ antisense_nmismatches,antisense_junctions,querylength,query_compress,
+ right_ambig_antisense,left_ambig_antisense,plusp,genestrand,sensedir,
+ first_read_p,chrnum,chroffset,chrhigh,chrlength,/*sarrayp*/true)) == NULL) {
+ Substring_free(&right_ambig_antisense);
+ Substring_free(&left_ambig_antisense);
+ Junction_gc(&antisense_junctions);
+ } else {
+ if (Stage3end_substrings_querystart(hit) < 8 &&
+ Stage3end_substrings_queryend(hit) >= querylength - 8) {
+ *completep = true;
+ }
+ hits = List_push(hits,(void *) hit);
+ }
+
+ Substring_free(&right_ambig_sense);
+ Substring_free(&left_ambig_sense);
+ Junction_gc(&sense_junctions);
+
+ } else {
+ /* Neither set of junctions/endpoints works */
+ Substring_free(&right_ambig_sense);
+ Substring_free(&left_ambig_sense);
+ Substring_free(&right_ambig_antisense);
+ Substring_free(&left_ambig_antisense);
+
+ Junction_gc(&sense_junctions);
+ Junction_gc(&antisense_junctions);
+ }
+
+
+ Intlist_free(&sense_nmismatches);
+ Intlist_free(&antisense_nmismatches);
+ Uintlist_free(&sense_lefts);
+ Uintlist_free(&antisense_lefts);
+ Intlist_free(&sense_endpoints);
+ Intlist_free(&antisense_endpoints);
+
+ for (p = super_path; p != NULL; p = List_next(p)) {
+ diagonal = (Univdiag_T) List_head(p);
+ Univdiag_free(&diagonal);
+ }
+ List_free(&super_path);
+
+ return hits;
+}
+
+
+
+
+List_T
+Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int querylength,
+ Compress_T query_compress_fwd, Compress_T query_compress_rev,
+ int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
+ int nmisses_allowed, int genestrand, bool first_read_p) {
+ List_T hits = NULL;
+ List_T plus_set, minus_set, p;
+ List_T rightward_set_plus = NULL, leftward_set_plus = NULL, rightward_set_minus = NULL, leftward_set_minus = NULL;
+ Elt_T best_plus_elt, best_minus_elt, elt, *plus_elt_array, *minus_elt_array;
+ UINT4 best_plus_nmatches, best_minus_nmatches, nmatches;
+ Sarrayptr_T initptr, finalptr;
+ bool successp, completep;
+ int plus_querypos, minus_querypos, halfwaypos;
+ int i;
+ Chrnum_T chrnum;
+ Univcoord_T chroffset, chrhigh, left;
+ Chrpos_T chrlength;
+ T plus_sarray, minus_sarray;
+ char *plus_conversion, *minus_conversion;
+
+ int nseeds_plus, nseeds_minus;
+ int *scores_plus = NULL, *scores_minus = NULL;
+ int niter, best_plus_i, best_minus_i, nplus, nminus;
+ int best_score;
+ List_T *middle_path_plus = NULL, *right_paths_plus = NULL, *left_paths_plus = NULL,
+ *middle_path_minus = NULL, *right_paths_minus = NULL, *left_paths_minus = NULL;
+ Univdiag_T *middle_diagonals_plus = NULL, *middle_diagonals_minus = NULL;
+ List_T *best_right_diagonals_plus = NULL, *best_left_diagonals_plus = NULL,
+ *all_right_diagonals_plus = NULL, *all_left_diagonals_plus = NULL,
+ *fillin_diagonals_plus = NULL, *fillin_diagonals_minus = NULL,
+ *best_right_diagonals_minus = NULL, *best_left_diagonals_minus = NULL,
+ *all_right_diagonals_minus = NULL, *all_left_diagonals_minus = NULL;
+
+ Intlist_T right_endpoints_sense, right_endpoints_antisense,
+ left_endpoints_sense, left_endpoints_antisense;
+ Intlist_T right_queryends_sense, right_queryends_antisense,
+ left_querystarts_sense, left_querystarts_antisense;
+ Uintlist_T right_ambcoords_sense, right_ambcoords_antisense,
+ left_ambcoords_sense, left_ambcoords_antisense;
+ Intlist_T right_amb_knowni_sense, right_amb_knowni_antisense,
+ left_amb_knowni_sense, left_amb_knowni_antisense;
+ Intlist_T right_amb_nmismatchesi_sense, right_amb_nmismatchesi_antisense,
+ right_amb_nmismatchesj_sense, right_amb_nmismatchesj_antisense,
+ left_amb_nmismatchesi_sense, left_amb_nmismatchesi_antisense,
+ left_amb_nmismatchesj_sense, left_amb_nmismatchesj_antisense;
+ Doublelist_T right_amb_probsi_sense, right_amb_probsi_antisense,
+ right_amb_probsj_sense, right_amb_probsj_antisense,
+ left_amb_probsi_sense, left_amb_probsi_antisense,
+ left_amb_probsj_sense, left_amb_probsj_antisense;
+
+ List_T diagonal_path;
+ bool twopartp = false;
+
+ Univdiag_T first_diagonal, last_diagonal, diagonal;
+ List_T low_diagonals, high_diagonals;
+ bool *coveredp;
+ Chrpos_T **mappings, chrstart, chrend;
+ int *npositions, totalpositions = 0;
+ int querystart, queryend, maxnconsecutive = 0;
+ Oligoindex_T oligoindex;
+ bool oned_matrix_p;
+ int indexsize;
+
+
+ if (nmisses_allowed < 0) {
+ nmisses_allowed = 0;
+ }
+ debug(printf("\nStarting Sarray_search_greedy with querylength %d and indexsize %d and nmisses_allowed %d, genestrand %d\n",
+ querylength,sarray_fwd->indexsize,nmisses_allowed,genestrand));
+
+ *found_score = querylength;
+
+ if (genestrand == +2) {
+ plus_conversion = conversion_rev;
+ minus_conversion = conversion_fwd;
+ plus_sarray = sarray_rev;
+ minus_sarray = sarray_fwd;
+ } else {
+ plus_conversion = conversion_fwd;
+ minus_conversion = conversion_rev;
+ plus_sarray = sarray_fwd;
+ minus_sarray = sarray_rev;
+ }
+
+
+ /* I. Race from plus and minus start to end */
+ plus_set = minus_set = (List_T) NULL;
+ best_plus_nmatches = best_minus_nmatches = 0;
+ best_plus_elt = best_minus_elt = (Elt_T) NULL;
+ plus_querypos = 0;
+ minus_querypos = 0;
+ niter = 0;
+ while (niter < nmisses_allowed && plus_querypos < querylength && minus_querypos < querylength) {
+ sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryuc_ptr[plus_querypos]),
+ querylength - plus_querypos,/*queryoffset*/plus_querypos,
+ query_compress_fwd,plus_sarray,/*plusp*/true,genestrand,first_read_p,plus_conversion);
+ elt = Elt_new(plus_querypos,nmatches,initptr,finalptr,/*temporaryp*/false);
+ if (nmatches > best_plus_nmatches && elt->nptr <= MAX_HITS_FOR_BEST_ELT) {
+ best_plus_elt = elt;
+ best_plus_nmatches = nmatches;
+ best_plus_i = niter;
+ }
+ plus_set = List_push(plus_set,elt);
plus_querypos += nmatches;
plus_querypos += 1; /* To skip the presumed mismatch */
- plus_niter++;
- }
- while (minus_querypos < querylength && minus_niter < nmisses_allowed) {
sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryrc[minus_querypos]),
querylength - minus_querypos,/*queryoffset*/minus_querypos,
query_compress_rev,minus_sarray,/*plusp*/false,genestrand,first_read_p,minus_conversion);
- elt = Elt_new(minus_querypos,nmatches,initptr,finalptr);
- minus_set = List_push(minus_set,(void *) elt);
- if (nmatches > best_minus_nmatches) {
+ elt = Elt_new(minus_querypos,nmatches,initptr,finalptr,/*temporaryp*/false);
+ if (nmatches > best_minus_nmatches && elt->nptr < MAX_HITS_FOR_BEST_ELT) {
best_minus_elt = elt;
best_minus_nmatches = nmatches;
-
- /* See if we have a substitution winner */
- Elt_fill_positions_all(best_minus_elt,minus_sarray);
- for (i = 0; i < best_minus_elt->npositions; i++) {
- left = best_minus_elt->positions[i];
- /* Should return max_mismatches + 1 if it exceeds the limit */
- if ((nmismatches = Genome_count_mismatches_limit(query_compress_rev,left,/*pos5*/0,/*pos3*/querylength,
- /*max_mismatches*/nmisses_allowed,
- /*plusp*/false,genestrand,first_read_p)) <= nmisses_allowed) {
- chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
- Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
- debug(printf("3. Reporting hit with %d mismatches vs %d allowed\n",nmismatches,nmisses_allowed));
- if ((hit = Stage3end_new_substitution(&(*found_score),nmismatches,
- left,/*genomiclength*/querylength,
- query_compress_rev,/*plusp*/false,genestrand,first_read_p,
- chrnum,chroffset,chrhigh,chrlength,
- /*sarrayp*/true)) != NULL) {
- *subs = List_push(*subs,(void *) hit);
- }
- }
- debug(printf("Looking at minus position %u => %d mismatches\n",left,nmismatches));
- }
+ best_minus_i = niter;
}
-
+ minus_set = List_push(minus_set,elt);
minus_querypos += nmatches;
minus_querypos += 1; /* To skip the presumed mismatch */
- minus_niter++;
+
+ niter++;
}
- debug(printf("Ended with %d plus iterations and %d minus iterations\n",plus_niter,minus_niter));
+#ifdef DEBUG
+ printf("niter %d vs %d allowed, plus 0..%d, minus 0..%d\n",niter,nmisses_allowed,plus_querypos,minus_querypos);
+ if (best_plus_elt != NULL) {
+ printf("best plus %d..%d (SA %u+%d)\n",
+ best_plus_elt->querystart,best_plus_elt->queryend,best_plus_elt->initptr,best_plus_elt->finalptr - best_plus_elt->initptr);
+ }
+ if (best_minus_elt != NULL) {
+ printf("best minus %d..%d (SA %u+%d)\n",
+ best_minus_elt->querystart,best_minus_elt->queryend,best_minus_elt->initptr,best_minus_elt->finalptr - best_minus_elt->initptr);
+ }
+ printf("plus set (positions not yet filled):\n");
+ for (p = plus_set; p != NULL; p = List_next(p)) {
+ Elt_dump((Elt_T) List_head(p));
+ }
+ printf("\n");
+ printf("minus set (positions not yet filled):\n");
+ for (p = minus_set; p != NULL; p = List_next(p)) {
+ Elt_dump((Elt_T) List_head(p));
+ }
+#endif
- if (plus_querypos >= querylength) {
- /* Handle plus extensions around best elt */
- debug(printf("BEST PLUS:\n"));
- debug(Elt_dump(best_plus_elt));
+ if (plus_querypos < querylength) {
+ debug(printf("Plus: could not find large pieces\n"));
+ nseeds_plus = 0;
- leftward_set = rightward_set = (List_T) NULL;
- for (p = plus_set; p != NULL; p = p->rest) {
- elt = (Elt_T) p->first;
- if (elt == best_plus_elt) {
- /* Skip */
+ } else if (best_plus_elt == NULL) {
+ debug(printf("Plus: No best elt\n"));
+ nseeds_plus = 0;
- } else if (elt->queryend < best_plus_elt->querystart) {
- leftward_set = List_push(leftward_set,(void *) elt);
+ } else {
+ Elt_fill_positions_all(best_plus_elt,plus_sarray);
+ if (best_plus_elt->npositions == 0) {
+ /* Could happen if there are too many positions */
+ debug(printf("Plus: Best elt has no positions\n"));
+ nseeds_plus = 0;
- } else if (elt->querystart > best_plus_elt->queryend) {
- rightward_set = List_push(rightward_set,(void *) elt);
+ } else {
+ plus_set = List_reverse(plus_set);
+ plus_elt_array = (Elt_T *) List_to_array_n(&nplus,plus_set);
- } else {
- /* Duplicate -- skip */
+#ifdef DEBUG
+ printf("LEFT\n");
+ for (i = 0; i < best_plus_i; i++) {
+ Elt_dump(plus_elt_array[i]);
}
- }
+ printf("MIDDLE\n");
+ Elt_dump(plus_elt_array[best_plus_i]);
+ printf("RIGHT\n");
+ for (i = best_plus_i + 1; i < nplus; i++) {
+ Elt_dump(plus_elt_array[i]);
+ }
+#endif
- if ((nelts = List_length(rightward_set)) > 0) {
- array = (Elt_T *) MALLOCA(nelts * sizeof(Elt_T));
- List_fill_array_and_free((void **) array,&rightward_set);
- rightward_set = (List_T) NULL;
-
- qsort(array,nelts,sizeof(Elt_T),Elt_querypos_ascending_cmp);
- for (i = nelts-1; i >= 0; --i) {
- rightward_set = List_push(rightward_set,(void *) array[i]);
+ nseeds_plus = best_plus_elt->npositions;
+ scores_plus = (int *) MALLOC(nseeds_plus*sizeof(int));
+ /* Assigned only if score is high */
+ middle_path_plus = (List_T *) CALLOC(nseeds_plus,sizeof(List_T));
+ right_paths_plus = (List_T *) CALLOC(nseeds_plus,sizeof(List_T));
+ left_paths_plus = (List_T *) CALLOC(nseeds_plus,sizeof(List_T));
+
+ middle_diagonals_plus = (Univdiag_T *) MALLOC(nseeds_plus*sizeof(Univdiag_T));
+ best_right_diagonals_plus = (List_T *) MALLOC(nseeds_plus*sizeof(List_T));
+ best_left_diagonals_plus = (List_T *) MALLOC(nseeds_plus*sizeof(List_T));
+ all_right_diagonals_plus = (List_T *) MALLOC(nseeds_plus*sizeof(List_T));
+ all_left_diagonals_plus = (List_T *) MALLOC(nseeds_plus*sizeof(List_T));
+ fillin_diagonals_plus = (List_T *) CALLOC(nseeds_plus,sizeof(List_T));
+
+ chrhigh = 0;
+ for (i = 0; i < nseeds_plus; i++) {
+ left = best_plus_elt->positions[i];
+ if (left > chrhigh) {
+ chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ /* *chrhigh += 1U; */
+ }
+ /* May not want to solve for best_right_diagonals and best_left_diagonals. Use oligoindex instead. */
+ scores_plus[i] = get_diagonals(&(middle_diagonals_plus[i]),
+ &(best_right_diagonals_plus[i]),&(best_left_diagonals_plus[i]),
+ &(all_right_diagonals_plus[i]),&(all_left_diagonals_plus[i]),
+ plus_sarray,/*queryptr*/queryuc_ptr,querylength,query_compress_fwd,
+ chroffset,chrhigh,chrlength,/*goal*/left,plus_elt_array,
+ best_plus_i,nplus,/*plusp*/true,genestrand,first_read_p,
+ plus_conversion,oligoindices_minor,diagpool);
+ debug(printf("Got plus score %d\n",scores_plus[i]));
}
- FREEA(array);
+
+ FREE(plus_elt_array);
}
+ }
- if ((nelts = List_length(leftward_set)) > 0) {
- array = (Elt_T *) MALLOCA(nelts * sizeof(Elt_T));
- List_fill_array_and_free((void **) array,&leftward_set);
- leftward_set = (List_T) NULL;
+ if (minus_querypos < querylength) {
+ debug(printf("Minus: Could not find large pieces\n"));
+ nseeds_minus = 0;
- qsort(array,nelts,sizeof(Elt_T),Elt_querypos_descending_cmp);
- for (i = nelts-1; i >= 0; --i) {
- leftward_set = List_push(leftward_set,(void *) array[i]);
- }
- FREEA(array);
- }
+ } else if (best_minus_elt == NULL) {
+ debug(printf("Minus: No best elt\n"));
+ nseeds_minus = 0;
+ } else {
+ Elt_fill_positions_all(best_minus_elt,minus_sarray);
+ if (best_minus_elt->npositions == 0) {
+ /* Could happen if there are too many positions */
+ debug(printf("Minus: Best elt has no positions\n"));
+ nseeds_minus = 0;
- chrhigh = 0U;
- Elt_fill_positions_all(best_plus_elt,plus_sarray);
- for (i = 0; i < best_plus_elt->npositions; i++) {
- left = best_plus_elt->positions[i];
- if (left > chrhigh) {
- chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
- Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
- /* *chrhigh += 1U; */
+ } else {
+ minus_set = List_reverse(minus_set);
+ minus_elt_array = (Elt_T *) List_to_array_n(&nminus,minus_set);
+
+#ifdef DEBUG
+ printf("LEFT\n");
+ for (i = 0; i < best_minus_i; i++) {
+ Elt_dump(minus_elt_array[i]);
}
- if (extend_rightward(/*goal*/left,chroffset,chrhigh,rightward_set,
- query_compress_fwd,plus_sarray,/*plusp*/true,genestrand,first_read_p,
- best_plus_elt->queryend) == true) {
- nmatches = Genome_consecutive_matches_leftward(query_compress_fwd,left,
- /*pos5*/0,/*pos3*/best_plus_elt->querystart,
- /*plusp*/true,genestrand,first_read_p);
- debug(printf(" extending bestelt querystart %d leftward by %d matches\n",best_plus_elt->querystart,nmatches));
- best_plus_elt->querystart -= nmatches;
- if (extend_leftward(/*goal*/left,chroffset,chrhigh,leftward_set,
- /*queryptr*/queryuc_ptr,query_compress_fwd,
- plus_sarray,/*plusp*/true,genestrand,first_read_p,plus_conversion,
- best_plus_elt->querystart,best_plus_elt->queryend) == true) {
- collect_elt_matches(&(*found_score),&(*subs),&(*indels),&(*ambiguous),&(*singlesplicing),&(*doublesplicing),
- best_plus_elt->querystart,best_plus_elt->queryend,
- chrnum,chroffset,chrhigh,chrlength,
- /*goal*/left,rightward_set,leftward_set,
- querylength,query_compress_fwd,/*plusp*/true,genestrand,first_read_p,
- nmisses_allowed);
+ printf("MIDDLE\n");
+ Elt_dump(minus_elt_array[best_minus_i]);
+ printf("RIGHT\n");
+ for (i = best_minus_i + 1; i < nminus; i++) {
+ Elt_dump(minus_elt_array[i]);
+ }
+#endif
+
+ nseeds_minus = best_minus_elt->npositions;
+ scores_minus = (int *) MALLOC(nseeds_minus*sizeof(int));
+ /* Assigned only if score is high */
+ middle_path_minus = (List_T *) CALLOC(nseeds_minus,sizeof(List_T));
+ right_paths_minus = (List_T *) CALLOC(nseeds_minus,sizeof(List_T));
+ left_paths_minus = (List_T *) CALLOC(nseeds_minus,sizeof(List_T));
+
+ middle_diagonals_minus = (Univdiag_T *) MALLOC(nseeds_minus*sizeof(Univdiag_T));
+ best_right_diagonals_minus = (List_T *) MALLOC(nseeds_minus*sizeof(List_T));
+ best_left_diagonals_minus = (List_T *) MALLOC(nseeds_minus*sizeof(List_T));
+ all_right_diagonals_minus = (List_T *) MALLOC(nseeds_minus*sizeof(List_T));
+ all_left_diagonals_minus = (List_T *) MALLOC(nseeds_minus*sizeof(List_T));
+ fillin_diagonals_minus = (List_T *) CALLOC(nseeds_minus,sizeof(List_T));
+
+ chrhigh = 0;
+ for (i = 0; i < nseeds_minus; i++) {
+ left = best_minus_elt->positions[i];
+ if (left > chrhigh) {
+ chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ /* *chrhigh += 1U; */
}
- best_plus_elt->querystart += nmatches;
+ /* May not want to solve for best_right_diagonals and best_left_diagonals. Use oligoindex instead. */
+ scores_minus[i] = get_diagonals(&(middle_diagonals_minus[i]),
+ &(best_right_diagonals_minus[i]),&(best_left_diagonals_minus[i]),
+ &(all_right_diagonals_minus[i]),&(all_left_diagonals_minus[i]),
+ minus_sarray,/*queryptr*/queryrc,querylength,query_compress_rev,
+ chroffset,chrhigh,chrlength,/*goal*/left,minus_elt_array,
+ best_minus_i,nminus,/*plusp*/false,genestrand,first_read_p,
+ minus_conversion,oligoindices_minor,diagpool);
+ debug(printf("Got minus score %d\n",scores_minus[i]));
}
+
+ FREE(minus_elt_array);
}
+ }
- List_free(&rightward_set);
- List_free(&leftward_set);
+#if 0
+ /* Because we don't always left-extend, we cannot trust best_score */
+ best_score = 0;
+ for (i = 0; i < nseeds_plus; i++) {
+ if (scores_plus[i] > best_score) {
+ best_score = scores_plus[i];
+ }
+ }
+ for (i = 0; i < nseeds_minus; i++) {
+ if (scores_minus[i] > best_score) {
+ best_score = scores_minus[i];
+ }
}
+#endif
- if (minus_querypos >= querylength) {
- /* Handle minus extensions around best elt */
- debug(printf("BEST MINUS:\n"));
- debug(Elt_dump(best_minus_elt));
+ debug(printf("Have %d nseeds_plus and %d nseeds_minus\n",nseeds_plus,nseeds_minus));
- leftward_set = rightward_set = (List_T) NULL;
- for (p = minus_set; p != NULL; p = p->rest) {
- elt = (Elt_T) p->first;
- if (elt == best_minus_elt) {
- /* Skip */
+ coveredp = (bool *) CALLOCA(querylength,sizeof(bool));
+ mappings = (Chrpos_T **) MALLOCA(querylength * sizeof(Chrpos_T *));
+ npositions = (int *) CALLOCA(querylength,sizeof(int));
+ oligoindex = Oligoindex_array_elt(oligoindices_minor,/*source*/0);
+ indexsize = Oligoindex_indexsize(oligoindex);
+
+ /* *sarray_gmap = (List_T) NULL; */
- } else if (elt->queryend < best_minus_elt->querystart) {
- leftward_set = List_push(leftward_set,(void *) elt);
+ chrhigh = 0;
+ for (i = 0; i < nseeds_plus; i++) {
+ if (1 /*|| scores_plus[i] > best_score - 20*/) {
+ diagonal = middle_diagonals_plus[i];
+ left = diagonal->univdiagonal;
+ if (left > chrhigh) {
+ chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ /* *chrhigh += 1U; */
+ }
+ middle_path_plus[i] = find_best_path(&(right_paths_plus[i]),&right_endpoints_sense,&right_endpoints_antisense,
+ &right_queryends_sense,&right_queryends_antisense,
+ &right_ambcoords_sense,&right_ambcoords_antisense,
+ &right_amb_knowni_sense,&right_amb_knowni_antisense,
+ &right_amb_nmismatchesi_sense,&right_amb_nmismatchesi_antisense,
+ &right_amb_nmismatchesj_sense,&right_amb_nmismatchesj_antisense,
+ &right_amb_probsi_sense,&right_amb_probsi_antisense,
+ &right_amb_probsj_sense,&right_amb_probsj_antisense,
+ &(left_paths_plus[i]),&left_endpoints_sense,&left_endpoints_antisense,
+ &left_querystarts_sense,&left_querystarts_antisense,
+ &left_ambcoords_sense,&left_ambcoords_antisense,
+ &left_amb_knowni_sense,&left_amb_knowni_antisense,
+ &left_amb_nmismatchesi_sense,&left_amb_nmismatchesi_antisense,
+ &left_amb_nmismatchesj_sense,&left_amb_nmismatchesj_antisense,
+ &left_amb_probsi_sense,&left_amb_probsi_antisense,
+ &left_amb_probsj_sense,&left_amb_probsj_antisense,
+ &(fillin_diagonals_plus[i]),diagonal,best_right_diagonals_plus[i],best_left_diagonals_plus[i],
+ /*queryptr*/queryuc_ptr,querylength,query_compress_fwd,chroffset,chrhigh,
+ oligoindices_minor,diagpool,/*plusp*/true,genestrand,first_read_p);
+
+ hits = solve_via_segments(&(*found_score),&completep,hits,middle_path_plus[i],
+ right_endpoints_sense,right_endpoints_antisense,
+ right_queryends_sense,right_queryends_antisense,
+ right_ambcoords_sense,right_ambcoords_antisense,
+ right_amb_knowni_sense,right_amb_knowni_antisense,
+ right_amb_nmismatchesi_sense,right_amb_nmismatchesi_antisense,
+ right_amb_nmismatchesj_sense,right_amb_nmismatchesj_antisense,
+ right_amb_probsi_sense,right_amb_probsi_antisense,
+ right_amb_probsj_sense,right_amb_probsj_antisense,
+
+ left_endpoints_sense,left_endpoints_antisense,
+ left_querystarts_sense,left_querystarts_antisense,
+ left_ambcoords_sense,left_ambcoords_antisense,
+ left_amb_knowni_sense,left_amb_knowni_antisense,
+ left_amb_nmismatchesi_sense,left_amb_nmismatchesi_antisense,
+ left_amb_nmismatchesj_sense,left_amb_nmismatchesj_antisense,
+ left_amb_probsi_sense,left_amb_probsi_antisense,
+ left_amb_probsj_sense,left_amb_probsj_antisense,
- } else if (elt->querystart > best_minus_elt->queryend) {
- rightward_set = List_push(rightward_set,(void *) elt);
+ chrnum,chroffset,chrhigh,chrlength,
+ querylength,query_compress_fwd,/*plusp*/true,genestrand,first_read_p);
- } else {
- /* Duplicate -- skip */
+#if 0
+ if (0 && completep == false) {
+ *sarray_gmap = run_gmap_plus(*sarray_gmap,middle_path_plus[i],/*start_paths*/left_paths_plus[i],/*end_paths*/right_paths_plus[i],
+ chrnum,chroffset,chrhigh,chrlength,queryuc_ptr,querylength,
+ genestrand,first_read_p,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ oligoindices_minor,diagpool,cellpool);
}
+#endif
+
+ Intlist_free(&right_endpoints_sense); Intlist_free(&right_endpoints_antisense);
+ Intlist_free(&right_queryends_sense); Intlist_free(&right_queryends_antisense);
+ Uintlist_free(&right_ambcoords_sense); Uintlist_free(&right_ambcoords_antisense);
+ Intlist_free(&right_amb_knowni_sense); Intlist_free(&right_amb_knowni_antisense);
+ Intlist_free(&right_amb_nmismatchesi_sense); Intlist_free(&right_amb_nmismatchesi_antisense);
+ Intlist_free(&right_amb_nmismatchesj_sense); Intlist_free(&right_amb_nmismatchesj_antisense);
+ Doublelist_free(&right_amb_probsi_sense); Doublelist_free(&right_amb_probsi_antisense);
+ Doublelist_free(&right_amb_probsj_sense); Doublelist_free(&right_amb_probsj_antisense);
+
+ Intlist_free(&left_endpoints_sense); Intlist_free(&left_endpoints_antisense);
+ Intlist_free(&left_querystarts_sense); Intlist_free(&left_querystarts_antisense);
+ Uintlist_free(&left_ambcoords_sense); Uintlist_free(&left_ambcoords_antisense);
+ Intlist_free(&left_amb_knowni_sense); Intlist_free(&left_amb_knowni_antisense);
+ Intlist_free(&left_amb_nmismatchesi_sense); Intlist_free(&left_amb_nmismatchesi_antisense);
+ Intlist_free(&left_amb_nmismatchesj_sense); Intlist_free(&left_amb_nmismatchesj_antisense);
+ Doublelist_free(&left_amb_probsi_sense); Doublelist_free(&left_amb_probsi_antisense);
+ Doublelist_free(&left_amb_probsj_sense); Doublelist_free(&left_amb_probsj_antisense);
}
+ }
- if ((nelts = List_length(rightward_set)) > 0) {
- array = (Elt_T *) MALLOCA(nelts * sizeof(Elt_T));
- List_fill_array_and_free((void **) array,&rightward_set);
- rightward_set = (List_T) NULL;
-
- qsort(array,nelts,sizeof(Elt_T),Elt_querypos_ascending_cmp);
- for (i = nelts-1; i >= 0; --i) {
- rightward_set = List_push(rightward_set,(void *) array[i]);
+ chrhigh = 0;
+ for (i = 0; i < nseeds_minus; i++) {
+ if (1 /*|| scores_minus[i] > best_score - 20*/) {
+ diagonal = middle_diagonals_minus[i];
+ left = diagonal->univdiagonal;
+ if (left > chrhigh) {
+ chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ /* *chrhigh += 1U; */
}
- FREEA(array);
+ middle_path_minus[i] = find_best_path(&(right_paths_minus[i]),&right_endpoints_sense,&right_endpoints_antisense,
+ &right_queryends_sense,&right_queryends_antisense,
+ &right_ambcoords_sense,&right_ambcoords_antisense,
+ &right_amb_knowni_sense,&right_amb_knowni_antisense,
+ &right_amb_nmismatchesi_sense,&right_amb_nmismatchesi_antisense,
+ &right_amb_nmismatchesj_sense,&right_amb_nmismatchesj_antisense,
+ &right_amb_probsi_sense,&right_amb_probsi_antisense,
+ &right_amb_probsj_sense,&right_amb_probsj_antisense,
+ &(left_paths_minus[i]),&left_endpoints_sense,&left_endpoints_antisense,
+ &left_querystarts_sense,&left_querystarts_antisense,
+ &left_ambcoords_sense,&left_ambcoords_antisense,
+ &left_amb_knowni_sense,&left_amb_knowni_antisense,
+ &left_amb_nmismatchesi_sense,&left_amb_nmismatchesi_antisense,
+ &left_amb_nmismatchesj_sense,&left_amb_nmismatchesj_antisense,
+ &left_amb_probsi_sense,&left_amb_probsi_antisense,
+ &left_amb_probsj_sense,&left_amb_probsj_antisense,
+ &(fillin_diagonals_minus[i]),diagonal,best_right_diagonals_minus[i],best_left_diagonals_minus[i],
+ /*queryptr*/queryrc,querylength,query_compress_rev,chroffset,chrhigh,
+ oligoindices_minor,diagpool,/*plusp*/false,genestrand,first_read_p);
+
+ hits = solve_via_segments(&(*found_score),&completep,hits,middle_path_minus[i],
+ right_endpoints_sense,right_endpoints_antisense,
+ right_queryends_sense,right_queryends_antisense,
+ right_ambcoords_sense,right_ambcoords_antisense,
+ right_amb_knowni_sense,right_amb_knowni_antisense,
+ right_amb_nmismatchesi_sense,right_amb_nmismatchesi_antisense,
+ right_amb_nmismatchesj_sense,right_amb_nmismatchesj_antisense,
+ right_amb_probsi_sense,right_amb_probsi_antisense,
+ right_amb_probsj_sense,right_amb_probsj_antisense,
+
+ left_endpoints_sense,left_endpoints_antisense,
+ left_querystarts_sense,left_querystarts_antisense,
+ left_ambcoords_sense,left_ambcoords_antisense,
+ left_amb_knowni_sense,left_amb_knowni_antisense,
+ left_amb_nmismatchesi_sense,left_amb_nmismatchesi_antisense,
+ left_amb_nmismatchesj_sense,left_amb_nmismatchesj_antisense,
+ left_amb_probsi_sense,left_amb_probsi_antisense,
+ left_amb_probsj_sense,left_amb_probsj_antisense,
+
+ chrnum,chroffset,chrhigh,chrlength,
+ querylength,query_compress_rev,/*plusp*/false,genestrand,first_read_p);
+
+#if 0
+ if (0 && completep == false) {
+ *sarray_gmap = run_gmap_minus(*sarray_gmap,middle_path_minus[i],/*start_paths*/right_paths_minus[i],/*end_paths*/left_paths_minus[i],
+ chrnum,chroffset,chrhigh,chrlength,queryuc_ptr,querylength,
+ genestrand,first_read_p,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ oligoindices_minor,diagpool,cellpool);
+ }
+#endif
+
+ Intlist_free(&right_endpoints_sense); Intlist_free(&right_endpoints_antisense);
+ Intlist_free(&right_queryends_sense); Intlist_free(&right_queryends_antisense);
+ Uintlist_free(&right_ambcoords_sense); Uintlist_free(&right_ambcoords_antisense);
+ Intlist_free(&right_amb_knowni_sense); Intlist_free(&right_amb_knowni_antisense);
+ Intlist_free(&right_amb_nmismatchesi_sense); Intlist_free(&right_amb_nmismatchesi_antisense);
+ Intlist_free(&right_amb_nmismatchesj_sense); Intlist_free(&right_amb_nmismatchesj_antisense);
+ Doublelist_free(&right_amb_probsi_sense); Doublelist_free(&right_amb_probsi_antisense);
+ Doublelist_free(&right_amb_probsj_sense); Doublelist_free(&right_amb_probsj_antisense);
+
+ Intlist_free(&left_endpoints_sense); Intlist_free(&left_endpoints_antisense);
+ Intlist_free(&left_querystarts_sense); Intlist_free(&left_querystarts_antisense);
+ Uintlist_free(&left_ambcoords_sense); Uintlist_free(&left_ambcoords_antisense);
+ Intlist_free(&left_amb_knowni_sense); Intlist_free(&left_amb_knowni_antisense);
+ Intlist_free(&left_amb_nmismatchesi_sense); Intlist_free(&left_amb_nmismatchesi_antisense);
+ Intlist_free(&left_amb_nmismatchesj_sense); Intlist_free(&left_amb_nmismatchesj_antisense);
+ Doublelist_free(&left_amb_probsi_sense); Doublelist_free(&left_amb_probsi_antisense);
+ Doublelist_free(&left_amb_probsj_sense); Doublelist_free(&left_amb_probsj_antisense);
+
}
+ }
- if ((nelts = List_length(leftward_set)) > 0) {
- array = (Elt_T *) MALLOCA(nelts * sizeof(Elt_T));
- List_fill_array_and_free((void **) array,&leftward_set);
- leftward_set = (List_T) NULL;
-
- qsort(array,nelts,sizeof(Elt_T),Elt_querypos_descending_cmp);
- for (i = nelts-1; i >= 0; --i) {
- leftward_set = List_push(leftward_set,(void *) array[i]);
+
+#if 0
+ /* Salvage using gmap */
+ chrhigh = 0;
+ for (i = 0; i < nseeds_plus; i++) {
+ if (incomplete_result_p(middle_path_plus[i],querylength) == true) {
+ left = best_plus_elt->positions[i];
+ if (left > chrhigh) {
+ chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ /* chrhigh += 1U; */
}
- FREEA(array);
+ *sarray_gmap = run_gmap_plus(*sarray_gmap,middle_path_plus[i],/*start_paths*/left_paths_plus[i],/*end_paths*/right_paths_plus[i],
+ chrnum,chroffset,chrhigh,chrlength,queryuc_ptr,querylength,
+ genestrand,first_read_p,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ oligoindices_minor,diagpool,cellpool);
}
+ }
- chrhigh = 0U;
- Elt_fill_positions_all(best_minus_elt,minus_sarray);
- for (i = 0; i < best_minus_elt->npositions; i++) {
+ chrhigh = 0;
+ for (i = 0; i < nseeds_minus; i++) {
+ if (incomplete_result_p(middle_path_minus[i],querylength) == true) {
left = best_minus_elt->positions[i];
if (left > chrhigh) {
chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
- /* *chrhigh += 1U; */
+ /* chrhigh += 1U; */
}
- if (extend_rightward(/*goal*/left,chroffset,chrhigh,rightward_set,
- query_compress_rev,minus_sarray,/*plusp*/false,genestrand,first_read_p,
- best_minus_elt->queryend) == true) {
- nmatches = Genome_consecutive_matches_leftward(query_compress_rev,left,
- /*pos5*/0,/*pos3*/best_minus_elt->querystart,
- /*plusp*/false,genestrand,first_read_p);
- debug(printf(" extending bestelt querystart %d leftward by %d matches\n",best_minus_elt->querystart,nmatches));
- best_minus_elt->querystart -= nmatches;
- if (extend_leftward(/*goal*/left,chroffset,chrhigh,leftward_set,
- /*queryptr*/queryrc,query_compress_rev,
- minus_sarray,/*plusp*/false,genestrand,first_read_p,minus_conversion,
- best_minus_elt->querystart,best_minus_elt->queryend) == true) {
- collect_elt_matches(&(*found_score),&(*subs),&(*indels),&(*ambiguous),&(*singlesplicing),&(*doublesplicing),
- best_minus_elt->querystart,best_minus_elt->queryend,
- chrnum,chroffset,chrhigh,chrlength,
- /*goal*/left,rightward_set,leftward_set,
- querylength,query_compress_rev,/*plusp*/false,genestrand,first_read_p,
- nmisses_allowed);
- }
- best_minus_elt->querystart += nmatches;
+ *sarray_gmap = run_gmap_minus(*sarray_gmap,middle_path_minus[i],/*start_paths*/right_paths_minus[i],/*end_paths*/left_paths_minus[i],
+ chrnum,chroffset,chrhigh,chrlength,queryuc_ptr,querylength,
+ genestrand,first_read_p,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ oligoindices_minor,diagpool,cellpool);
+ }
+ }
+#endif
+
+
+ if (nseeds_minus > 0) {
+ FREE(scores_minus);
+ for (i = 0; i < nseeds_minus; i++) {
+ for (p = right_paths_minus[i]; p != NULL; p = List_next(p)) {
+ diagonal_path = (List_T) List_head(p);
+ List_free(&diagonal_path);
+ }
+ for (p = left_paths_minus[i]; p != NULL; p = List_next(p)) {
+ diagonal_path = (List_T) List_head(p);
+ List_free(&diagonal_path);
}
+ List_free(&(middle_path_minus[i]));
+ List_free(&(left_paths_minus[i]));
+ List_free(&(right_paths_minus[i]));
+
+
+ Univdiag_free(&(middle_diagonals_minus[i]));
+ List_free(&(best_right_diagonals_minus[i]));
+ List_free(&(best_left_diagonals_minus[i]));
+ Univdiag_gc(&(all_right_diagonals_minus[i]));
+ Univdiag_gc(&(all_left_diagonals_minus[i]));
+ Univdiag_gc(&(fillin_diagonals_minus[i]));
}
+ FREE(middle_diagonals_minus);
+ FREE(best_right_diagonals_minus);
+ FREE(best_left_diagonals_minus);
+ FREE(all_right_diagonals_minus);
+ FREE(all_left_diagonals_minus);
+ FREE(fillin_diagonals_minus);
+
+ FREE(middle_path_minus);
+ FREE(right_paths_minus);
+ FREE(left_paths_minus);
+ }
- List_free(&rightward_set);
- List_free(&leftward_set);
+ if (nseeds_plus > 0) {
+ FREE(scores_plus);
+ for (i = 0; i < nseeds_plus; i++) {
+ for (p = right_paths_plus[i]; p != NULL; p = List_next(p)) {
+ diagonal_path = (List_T) List_head(p);
+ List_free(&diagonal_path);
+ }
+ for (p = left_paths_plus[i]; p != NULL; p = List_next(p)) {
+ diagonal_path = (List_T) List_head(p);
+ List_free(&diagonal_path);
+ }
+ List_free(&(middle_path_plus[i]));
+ List_free(&(left_paths_plus[i]));
+ List_free(&(right_paths_plus[i]));
+
+ Univdiag_free(&(middle_diagonals_plus[i]));
+ List_free(&(best_right_diagonals_plus[i]));
+ List_free(&(best_left_diagonals_plus[i]));
+ Univdiag_gc(&(all_right_diagonals_plus[i]));
+ Univdiag_gc(&(all_left_diagonals_plus[i]));
+ Univdiag_gc(&(fillin_diagonals_plus[i]));
+ }
+ FREE(middle_diagonals_plus);
+ FREE(best_right_diagonals_plus);
+ FREE(best_left_diagonals_plus);
+ FREE(all_right_diagonals_plus);
+ FREE(all_left_diagonals_plus);
+ FREE(fillin_diagonals_plus);
+
+ FREE(middle_path_plus);
+ FREE(right_paths_plus);
+ FREE(left_paths_plus);
}
+ List_free(&leftward_set_minus);
+ List_free(&rightward_set_minus);
+ List_free(&leftward_set_plus);
+ List_free(&rightward_set_plus);
+
for (p = plus_set; p != NULL; p = p->rest) {
elt = (Elt_T) p->first;
Elt_free(&elt);
@@ -4896,9 +8388,8 @@ Sarray_search_greedy (int *found_score, List_T *subs, List_T *indels, List_T *am
}
List_free(&minus_set);
- debug(printf("Found %d subs, %d indels, %d singlesplices, %d doublesplices\n",
- List_length(*subs),List_length(*indels),List_length(*singlesplicing),List_length(*doublesplicing)));
+ debug(printf("Found %d hits\n",List_length(hits)));
- return;
+ return hits;
}
diff --git a/src/sarray-read.h b/src/sarray-read.h
index e4b7a3c..b8ca703 100644
--- a/src/sarray-read.h
+++ b/src/sarray-read.h
@@ -1,4 +1,4 @@
-/* $Id: sarray-read.h 136085 2014-05-13 23:00:04Z twu $ */
+/* $Id: sarray-read.h 166785 2015-06-02 17:58:27Z twu $ */
#ifndef SARRAY_READ_INCLUDED
#define SARRAY_READ_INCLUDED
#include "access.h"
@@ -27,17 +27,22 @@ Sarray_setup (T sarray_fwd_in, T sarray_rev_in, Genome_T genome_in, Mode_T mode,
Univcoord_T *splicesites_in, Splicetype_T *splicetypes_in,
Chrpos_T *splicedists_in, int nsplicesites_in);
+extern void
+Sarray_shmem_remove (char *dir, char *fileroot, char *snps_root, Mode_T mode, bool fwdp);
+
extern T
-Sarray_new (char *directory, char *fileroot, char *snps_root, Access_mode_T sarray_access, Access_mode_T aux_access,
- Mode_T mode, bool fwdp);
+Sarray_new (char *dir, char *fileroot, char *snps_root, Access_mode_T sarray_access, Access_mode_T lcp_access,
+ Access_mode_T guideexc_access, Access_mode_T indexij_access, bool sharedp, Mode_T mode, bool fwdp);
extern void
Sarray_free (T *old);
-extern void
-Sarray_search_greedy (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous, List_T *singlesplicing,
- List_T *doublesplicing, char *queryuc_ptr, char *queryrc, int querylength,
- Compress_T query_compress_fwd, Compress_T query_compress_rev, int nmisses_allowed,
- int genestrand, bool first_read_p);
+extern List_T
+Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int querylength,
+ Compress_T query_compress_fwd, Compress_T query_compress_rev,
+ int maxpeelback, Pairpool_T pairpool,
+ Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
+ int nmisses_allowed, int genestrand, bool first_read_p);
#undef T
#endif
diff --git a/src/sarray-write.c b/src/sarray-write.c
index 68c6d20..f9e75dc 100644
--- a/src/sarray-write.c
+++ b/src/sarray-write.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-write.c 151046 2014-10-16 19:08:41Z twu $";
+static char rcsid[] = "$Id: sarray-write.c 167266 2015-06-11 00:07:57Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -9,6 +9,8 @@ static char rcsid[] = "$Id: sarray-write.c 151046 2014-10-16 19:08:41Z twu $";
#include <stdio.h>
#include <string.h>
#include <sys/mman.h> /* For munmap */
+#include <math.h> /* For rint */
+
#include "bool.h"
#include "access.h"
#include "mem.h"
@@ -74,16 +76,17 @@ static char rcsid[] = "$Id: sarray-write.c 151046 2014-10-16 19:08:41Z twu $";
/* #define READ_SA_FROM_FILE 1 */
#define MONITOR_INTERVAL 100000000 /* 100 million nt */
+#define RW_BATCH 10000000 /* 10 million elements */
/* For standard genome */
void
Sarray_write_array (char *sarrayfile, Genome_T genomecomp, UINT4 genomelength) {
UINT4 *SA;
- UINT4 n = genomelength;
+ UINT4 n = genomelength, ii;
unsigned char *gbuffer;
FILE *fp;
-
-
+ void *p;
+
SA = (UINT4 *) MALLOC((n+1)*sizeof(UINT4));
gbuffer = (unsigned char *) CALLOC(n+1,sizeof(unsigned char));
Genome_fill_buffer_int_string(genomecomp,/*left*/0,/*length*/n,gbuffer,/*conversion*/NULL);
@@ -94,7 +97,18 @@ Sarray_write_array (char *sarrayfile, Genome_T genomecomp, UINT4 genomelength) {
fprintf(stderr,"Can't write to file %s\n",sarrayfile);
exit(9);
} else {
+#if 0
FWRITE_UINTS(SA,n+1,fp);
+#else
+ for (ii = 0; ii + RW_BATCH <= n; ii += RW_BATCH) {
+ p = (void *) &(SA[ii]);
+ FWRITE_UINTS(p,RW_BATCH,fp);
+ }
+ if (ii <= n) {
+ p = (void *) &(SA[ii]);
+ FWRITE_UINTS(p,n - ii + 1,fp);
+ }
+#endif
fclose(fp);
}
@@ -108,8 +122,9 @@ Sarray_write_array (char *sarrayfile, Genome_T genomecomp, UINT4 genomelength) {
void
Sarray_write_array_from_genome (char *sarrayfile, unsigned char *gbuffer, UINT4 genomelength) {
UINT4 *SA;
- UINT4 n = genomelength;
+ UINT4 n = genomelength, ii;
FILE *fp;
+ void *p;
SA = (UINT4 *) MALLOC((n+1)*sizeof(UINT4));
@@ -119,7 +134,18 @@ Sarray_write_array_from_genome (char *sarrayfile, unsigned char *gbuffer, UINT4
fprintf(stderr,"Can't write to file %s\n",sarrayfile);
exit(9);
} else {
+#if 0
FWRITE_UINTS(SA,n+1,fp);
+#else
+ for (ii = 0; ii + RW_BATCH <= n; ii += RW_BATCH) {
+ p = (void *) &(SA[ii]);
+ FWRITE_UINTS(p,RW_BATCH,fp);
+ }
+ if (ii <= n) {
+ p = (void *) &(SA[ii]);
+ FWRITE_UINTS(p,n - ii + 1,fp);
+ }
+#endif
fclose(fp);
}
@@ -466,7 +492,8 @@ Sarray_write_index_separate (char *indexiptrsfile, char *indexicompfile, char *i
char *sarrayfile, Genome_T genomecomp, UINT4 genomelength, bool compressp,
char chartable[]) {
UINT4 n = genomelength;
- Oligospace_T oligospace, prev_oligospace, noccupied, prev_noccupied;
+ Oligospace_T oligospace, prev_oligospace, noccupied;
+ /* Oligospace_T prev_noccupied; */
Sarrayptr_T *saindexi_new, *saindexj_new, *saindexi_old, *saindexj_old;
UINT4 *SA;
int sa_fd;
@@ -481,7 +508,7 @@ Sarray_write_index_separate (char *indexiptrsfile, char *indexicompfile, char *i
oligospace = power(4,/*querylength*/indexsize);
saindexi_old = (Sarrayptr_T *) CALLOC(oligospace,sizeof(Sarrayptr_T));
saindexj_old = (Sarrayptr_T *) CALLOC(oligospace,sizeof(Sarrayptr_T));
- prev_noccupied = 0;
+ /* prev_noccupied = 0; */
noccupied = make_index_separate(saindexi_old,saindexj_old,
oligospace,/*querylength*/indexsize,genomecomp,SA,n,chartable);
fprintf(stderr,"For indexsize %d, occupied %u/%u\n",indexsize,noccupied,oligospace);
@@ -570,7 +597,8 @@ Sarray_write_index_interleaved (char *indexptrsfile, char *indexcompfile,
char *sarrayfile, Genome_T genomecomp, UINT4 genomelength, bool compressp,
char chartable[]) {
UINT4 n = genomelength;
- Oligospace_T oligospace, prev_oligospace, noccupied, prev_noccupied;
+ Oligospace_T oligospace, prev_oligospace, noccupied;
+ /* Oligospace_T prev_noccupied; */
Sarrayptr_T *saindex_new, *saindex_old;
UINT4 *SA;
int sa_fd;
@@ -584,7 +612,7 @@ Sarray_write_index_interleaved (char *indexptrsfile, char *indexcompfile,
indexsize = MIN_INDEXSIZE;
oligospace = power(4,/*querylength*/indexsize);
saindex_old = (Sarrayptr_T *) CALLOC(2*oligospace,sizeof(Sarrayptr_T));
- prev_noccupied = 0;
+ /* prev_noccupied = 0; */
noccupied = make_index_interleaved(saindex_old,
oligospace,/*querylength*/indexsize,genomecomp,SA,n,chartable);
fprintf(stderr,"For indexsize %d, occupied %u/%u\n",indexsize,noccupied,oligospace);
@@ -661,6 +689,149 @@ Sarray_write_index_interleaved (char *indexptrsfile, char *indexcompfile,
}
+/* phi is the successor array: [0..n] */
+void
+Sarray_write_csa (char **csaptrfiles, char **csacompfiles, char *sasampleqfile, char *sasamplesfile, char *saindex0file,
+ char *sarrayfile, char *rankfile, Genome_T genomecomp, UINT4 genomelength, char chartable[]) {
+ UINT4 *CSA, *SA, *SA_inv, sa_i;
+ FILE *fp, *sa_fp, *samples_fp;
+ /* FILE *csa_fp; */
+ UINT4 n = genomelength, n_plus_one, ii, i, b;
+ int chari, k;
+ Sarrayptr_T saindexi[5], saindexj[5], saindexn, indexX;
+ int sa_fd, rank_fd;
+ size_t sa_len, rank_len;
+ int indexsize;
+ UINT4 *read_buffer, *write_buffer, ignore;
+ char *queryuc_ptr;
+ int csa_sampling;
+
+ /* Write SA sampling interval */
+ fp = fopen(sasampleqfile,"wb");
+ csa_sampling = rint(log((double) genomelength)/log(2.0));
+ fprintf(stderr,"CSA sampling: %d\n",csa_sampling);
+ FWRITE_INT(csa_sampling,fp);
+ fclose(fp);
+
+
+ /* Determine sizes of each csa */
+ SA = (UINT4 *) Access_mmap(&sa_fd,&sa_len,sarrayfile,sizeof(UINT4),/*randomp*/true);
+ queryuc_ptr = (char *) CALLOC(/*querylength*/1+1,sizeof(char));
+
+ /* A */
+ oligo_nt(queryuc_ptr,/*oligo*/0,/*querylength*/1);
+ sarray_search_simple(&(saindexi[0]),&(saindexj[0]),queryuc_ptr,/*querylength*/1,
+ genomecomp,SA,/*i*/1,/*j*/n,n,chartable);
+ printf("A: %u..%u\n",saindexi[0],saindexj[0]);
+
+ /* C */
+ oligo_nt(queryuc_ptr,/*oligo*/1,/*querylength*/1);
+ sarray_search_simple(&(saindexi[1]),&(saindexj[1]),queryuc_ptr,/*querylength*/1,
+ genomecomp,SA,/*i*/1,/*j*/n,n,chartable);
+ printf("C: %u..%u\n",saindexi[1],saindexj[1]);
+
+ /* G */
+ oligo_nt(queryuc_ptr,/*oligo*/2,/*querylength*/1);
+ sarray_search_simple(&(saindexi)[2],&(saindexj[2]),queryuc_ptr,/*querylength*/1,
+ genomecomp,SA,/*i*/1,/*j*/n,n,chartable);
+ printf("G: %u..%u\n",saindexi[2],saindexj[2]);
+
+ /* T */
+ oligo_nt(queryuc_ptr,/*oligo*/3,/*querylength*/1);
+ sarray_search_simple(&(saindexi[3]),&(saindexj[3]),queryuc_ptr,/*querylength*/1,
+ genomecomp,SA,/*i*/1,/*j*/n,n,chartable);
+ printf("T: %u..%u\n",saindexi[3],saindexj[3]);
+
+ /* X */
+ saindexi[4] = saindexj[3] + 1;
+ saindexj[4] = genomelength;
+ printf("X: %u..%u\n",saindexi[4],saindexj[4]);
+
+ munmap((void *) SA,sa_len);
+ close(sa_fd);
+
+ fp = fopen(saindex0file,"wb");
+ FWRITE_UINT(saindexi[0],fp);
+ FWRITE_UINT(saindexi[1],fp);
+ FWRITE_UINT(saindexi[2],fp);
+ FWRITE_UINT(saindexi[3],fp);
+ FWRITE_UINT(saindexi[4],fp);
+
+ n_plus_one = genomelength + 1;
+ FWRITE_UINT(n_plus_one,fp); /* Needed by sarray-read to find genomiclength */
+
+ fclose(fp);
+
+
+ /* Process suffix array */
+ read_buffer = (UINT4 *) MALLOC(RW_BATCH * sizeof(UINT4));
+
+ SA_inv = (UINT4 *) Access_mmap(&rank_fd,&rank_len,rankfile,sizeof(UINT4),/*randomp*/true);
+ /* csa_fp = fopen(csafile,"wb");*/
+ sa_fp = fopen(sarrayfile,"rb");
+ samples_fp = fopen(sasamplesfile,"wb");
+
+ CSA = (UINT4 *) MALLOC((n+1)*sizeof(UINT4));
+
+ /* Ignore csa[0] which corresponds to end-of-string terminator */
+ FREAD_UINT(&sa_i,sa_fp);
+ FWRITE_UINT(sa_i,samples_fp);
+ CSA[0] = genomelength;
+ /* FWRITE_UINT(CSA[0],csa_fp); */
+
+ ii = 1;
+ while (ii + RW_BATCH <= n) {
+ FREAD_UINTS(read_buffer,RW_BATCH,sa_fp);
+ for (b = 0, i = ii; b < RW_BATCH; b++, i++) {
+ if ((i % csa_sampling) == 0) {
+ FWRITE_UINT(read_buffer[b],samples_fp);
+ }
+ CSA[i] = SA_inv[read_buffer[b] + 1];
+ }
+ /* FWRITE_UINTS(&(CSA[ii]),RW_BATCH,csa_fp); */
+ ii += RW_BATCH;
+ }
+
+ /* Final partial batch */
+ for (i = ii; i <= n; i++) { /* final partial batch */
+ FREAD_UINT(&sa_i,sa_fp);
+ if ((i % csa_sampling) == 0) {
+ FWRITE_UINT(sa_i,samples_fp);
+ }
+ CSA[i] = SA_inv[sa_i + 1];
+ /* FWRITE_UINT(CSA[i],csa_fp);*/
+ }
+ /* fclose(csa_fp); */
+
+ fclose(samples_fp);
+ fclose(sa_fp);
+ munmap((void *) SA_inv,rank_len);
+ close(rank_fd);
+ FREE(read_buffer);
+
+ for (chari = 0; chari < 5; chari++) {
+ if (saindexj[chari] < saindexi[chari]) {
+ fp = fopen(csaptrfiles[chari],"wb");
+ fclose(fp);
+ fp = fopen(csacompfiles[chari],"wb");
+ fclose(fp);
+ } else {
+ saindexn = saindexj[chari] - saindexi[chari] + 1;
+ /* Provide (n-1) to write values [0..n] */
+ Bitpack64_write_differential(csaptrfiles[chari],csacompfiles[chari],
+ &(CSA[saindexi[chari]]),saindexn-1);
+ }
+ }
+
+ fprintf(stderr,"done\n");
+
+ FREE(CSA);
+
+ return;
+}
+
+
+
#if 0
UINT4 *
Sarray_compute_lcp_kasai (UINT4 *SA, UINT4 n) {
@@ -724,6 +895,7 @@ Sarray_compute_lcp_kasai (UINT4 *SA, UINT4 n) {
#if 0
/* Puts rank in file, to save on memory */
+/* Rank file contains the inverse suffix array, needed to compute the compressed suffix array */
UINT4 *
Sarray_compute_lcp (char *rankfile, UINT4 *SA, UINT4 n) {
UINT4 *lcp;
@@ -790,9 +962,8 @@ Sarray_compute_lcp (char *rankfile, UINT4 *SA, UINT4 n) {
#endif
-#define RW_BATCH 10000000 /* 10 million elements */
-
/* Puts rank and permuted suffix array in file, to save on memory even further */
+/* Rank file is the same as the inverted suffix array, needed to compute the compressed suffix array */
UINT4 *
Sarray_compute_lcp (char *rankfile, char *permuted_sarray_file, char *sarrayfile, UINT4 n) {
UINT4 *lcp;
@@ -935,7 +1106,9 @@ Sarray_compute_lcp (char *rankfile, char *permuted_sarray_file, char *sarrayfile
FREE(read_buffer_1);
remove(permuted_sarray_file);
+#ifndef USE_CSA
remove(rankfile);
+#endif
return lcp;
}
@@ -1044,7 +1217,7 @@ Sarray_compute_lcp_from_genome (UINT4 *SA, unsigned char *gbuffer, UINT4 n) {
UINT4 *rank, h;
UINT4 i, j;
char *comma;
- UINT4 horig;
+ /* UINT4 horig; */
lcp = (UINT4 *) MALLOC((n+1)*sizeof(UINT4));
@@ -1058,7 +1231,7 @@ Sarray_compute_lcp_from_genome (UINT4 *SA, unsigned char *gbuffer, UINT4 n) {
for (i = 0; i <= n; i++) {
if (rank[i] > 0) {
j = SA[rank[i] - 1];
- horig = h;
+ /* horig = h; */
while (i + h < n && j + h < n && gbuffer[i+h] == gbuffer[j+h]) {
h++;
}
@@ -2039,6 +2212,7 @@ Sarray_array_uncompress (Genome_T genomecomp, char *sarrayfile, char *plcpptrsfi
UINT4 n = genomelength, pos, match, h;
unsigned char *gbuffer;
+ int shmid;
UINT4 *SA, *plcpptrs, *plcpcomp;
int sa_fd, plcpcomp_fd;
@@ -2058,7 +2232,7 @@ Sarray_array_uncompress (Genome_T genomecomp, char *sarrayfile, char *plcpptrsfi
}
SA = (UINT4 *) Access_mmap(&sa_fd,&sa_len,sarrayfile,sizeof(UINT4),/*randomp*/false);
- plcpptrs = (UINT4 *) Access_allocated(&plcpptrs_len,&seconds,plcpptrsfile,sizeof(UINT4));
+ plcpptrs = (UINT4 *) Access_allocate(&shmid,&plcpptrs_len,&seconds,plcpptrsfile,sizeof(UINT4),/*sharedp*/false);
plcpcomp = (UINT4 *) Access_mmap(&plcpcomp_fd,&plcpcomp_len,plcpcompfile,sizeof(UINT4),
/*randomp*/true);
plcpcomp = (UINT4 *) Access_mmap(&plcpcomp_fd,&plcpcomp_len,plcpcompfile,sizeof(UINT4),
diff --git a/src/sarray-write.h b/src/sarray-write.h
index 64aa3bf..75ecf94 100644
--- a/src/sarray-write.h
+++ b/src/sarray-write.h
@@ -1,4 +1,4 @@
-/* $Id: sarray-write.h 140511 2014-07-03 01:50:36Z twu $ */
+/* $Id: sarray-write.h 165971 2015-05-20 00:20:26Z twu $ */
#ifndef SARRAY_WRITE_INCLUDED
#define SARRAY_WRITE_INCLUDED
#include "types.h"
@@ -20,6 +20,10 @@ Sarray_write_index_interleaved (char *indexptrsfile, char *indexcompfile,
char *sarrayfile, Genome_T genomecomp, UINT4 genomelength, bool compressp,
char chartable[]);
+extern void
+Sarray_write_csa (char **csaptrfiles, char **csacompfiles, char *sasampleqfile, char *sasamplesfile, char *saindex0file,
+ char *sarrayfile, char *rankfile, Genome_T genomecomp, UINT4 genomelength, char chartable[]);
+
extern UINT4 *
Sarray_compute_lcp (char *rankfile, char *permuted_sarray_file, char *sarrayfile, UINT4 n);
extern UINT4 *
diff --git a/src/segmentpos.c b/src/segmentpos.c
index 5c92b66..3c99a99 100644
--- a/src/segmentpos.c
+++ b/src/segmentpos.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: segmentpos.c 138719 2014-06-11 17:07:13Z twu $";
+static char rcsid[] = "$Id: segmentpos.c 155282 2014-12-12 19:42:54Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -86,8 +86,8 @@ Segmentpos_free (T *old) {
}
void
-Segmentpos_print (FILE *fp, T this, char *acc, Univcoord_T chroffset) {
- fprintf(fp,"%s\t%u\t%s\t%u\t%u\n",acc,chroffset+this->chrpos1,Chrom_string(this->chrom),this->chrpos1,this->length);
+Segmentpos_print (Filestring_T fp, T this, char *acc, Univcoord_T chroffset) {
+ FPRINTF(fp,"%s\t%u\t%s\t%u\t%u\n",acc,chroffset+this->chrpos1,Chrom_string(this->chrom),this->chrpos1,this->length);
return;
}
@@ -227,7 +227,7 @@ contig_print_p (Univ_IIT_T contig_iit, int contig_straintype, bool referencealig
void
-Segmentpos_print_accessions (FILE *fp, Univ_IIT_T contig_iit, Univcoord_T position1,
+Segmentpos_print_accessions (Filestring_T fp, Univ_IIT_T contig_iit, Univcoord_T position1,
Univcoord_T position2, bool referencealignp,
char *align_strain) {
Univcoord_T contig_start;
@@ -239,7 +239,7 @@ Segmentpos_print_accessions (FILE *fp, Univ_IIT_T contig_iit, Univcoord_T positi
Univinterval_T interval;
bool printreferencep, printaltp, firstprintp = false, allocp;
- fprintf(fp," Accessions: ");
+ FPRINTF(fp," Accessions: ");
indices = Univ_IIT_get(&nindices,contig_iit,position1,position2);
if (referencealignp == true) {
@@ -276,39 +276,26 @@ Segmentpos_print_accessions (FILE *fp, Univ_IIT_T contig_iit, Univcoord_T positi
comma2 = Genomicpos_commafmt((Univcoord_T) (relend + ONEBASEDP));
if (firstprintp == true) {
- printf("; ");
+ FPRINTF(fp,"; ");
} else {
firstprintp = true;
}
-#if 0
- if (IIT_version(contig_iit) <= 1) {
- firstchar = IIT_annotation_firstchar(contig_iit,index);
- if (firstchar == '-') {
- printf("[-]");
- }
- } else {
- if (Interval_sign(interval) < 0) {
- printf("[-]");
- }
- }
-#else
firstchar = Univ_IIT_annotation_firstchar(contig_iit,index);
if (firstchar == '-') {
- printf("[-]");
+ FPRINTF(fp,"[-]");
}
-#endif
label = Univ_IIT_label(contig_iit,index,&allocp);
- fprintf(fp,"%s",label);
+ FPRINTF(fp,"%s",label);
if (allocp == true) {
FREE(label);
}
if (referencealignp == false && contig_straintype == 0) {
- fprintf(fp,"[reference strain]");
+ FPRINTF(fp,"[reference strain]");
}
- fprintf(fp,":%s%s%s (out of %u bp)",comma1,SEPARATOR,comma2,contig_length);
+ FPRINTF(fp,":%s%s%s (out of %u bp)",comma1,SEPARATOR,comma2,contig_length);
FREE(comma2);
FREE(comma1);
@@ -317,7 +304,7 @@ Segmentpos_print_accessions (FILE *fp, Univ_IIT_T contig_iit, Univcoord_T positi
}
j++;
}
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
if (indices != NULL) {
FREE(indices);
diff --git a/src/segmentpos.h b/src/segmentpos.h
index 162a224..5de4638 100644
--- a/src/segmentpos.h
+++ b/src/segmentpos.h
@@ -1,4 +1,4 @@
-/* $Id: segmentpos.h 138719 2014-06-11 17:07:13Z twu $ */
+/* $Id: segmentpos.h 155282 2014-12-12 19:42:54Z twu $ */
#ifndef SEGMENTPOS_INCLUDED
#define SEGMENTPOS_INCLUDED
#include <stdio.h>
@@ -7,6 +7,7 @@
#include "types.h"
#include "chrom.h"
#include "iit-read-univ.h"
+#include "filestring.h"
#define T Segmentpos_T
typedef struct T *T;
@@ -29,7 +30,7 @@ Segmentpos_new (Chrom_T chrom, Chrpos_T chrpos1, Chrpos_T chrpos2,
extern void
Segmentpos_free (T *old);
extern void
-Segmentpos_print (FILE *fp, T this, char *acc, Univcoord_T chroffset);
+Segmentpos_print (Filestring_T fp, T this, char *acc, Univcoord_T chroffset);
extern int
Segmentpos_compare_alpha (const void *x, const void *y);
extern int
@@ -40,7 +41,7 @@ extern int
Segmentpos_compare_order (const void *x, const void *y);
extern void
-Segmentpos_print_accessions (FILE *fp, Univ_IIT_T contig_iit, Univcoord_T position1,
+Segmentpos_print_accessions (Filestring_T fp, Univ_IIT_T contig_iit, Univcoord_T position1,
Univcoord_T position2, bool referencealignp,
char *align_strain);
diff --git a/src/sequence.c b/src/sequence.c
index 3bb1be1..f049b51 100644
--- a/src/sequence.c
+++ b/src/sequence.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sequence.c 132731 2014-04-08 21:19:57Z twu $";
+static char rcsid[] = "$Id: sequence.c 166641 2015-05-29 21:13:04Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -58,8 +58,6 @@ static char rcsid[] = "$Id: sequence.c 132731 2014-04-08 21:19:57Z twu $";
* ^trimstart
* ^contents
*
- * Trimming is determined by Oligoindex_set_inquery(), based on
- * finding unique 8-mers on each end.
************************************************************************/
@@ -1203,7 +1201,7 @@ Sequence_endstream () {
T
-Sequence_read (int *nextchar, FILE *input, bool maponlyp) {
+Sequence_read (int *nextchar, FILE *input) {
T new;
int fulllength, skiplength;
char *pointer1, *pointer2a, *pointer2b;
@@ -1253,11 +1251,9 @@ Sequence_read (int *nextchar, FILE *input, bool maponlyp) {
}
if (skiplength > 0) {
- if (maponlyp == false) {
- fprintf(stderr,"Warning: cDNA sequence length of %d exceeds maximum length of %d. Truncating %d chars in middle.\n",
- fulllength+skiplength,MAXSEQLEN,skiplength);
- fprintf(stderr," (For long sequences, perhaps you want maponly mode, by providing the '-1' flag.)\n");
- }
+ fprintf(stderr,"Warning: cDNA sequence length of %d exceeds maximum length of %d. Truncating %d chars in middle.\n",
+ fulllength+skiplength,MAXSEQLEN,skiplength);
+ fprintf(stderr," (For long sequences, perhaps you want maponly mode, by providing the '-1' flag.)\n");
}
#ifdef PMAP
@@ -1342,7 +1338,7 @@ Sequence_read (int *nextchar, FILE *input, bool maponlyp) {
T
-Sequence_read_multifile (int *nextchar, FILE **input, char ***files, int *nfiles, bool maponlyp) {
+Sequence_read_multifile (int *nextchar, FILE **input, char ***files, int *nfiles) {
T queryseq;
while (1) {
@@ -1371,7 +1367,7 @@ Sequence_read_multifile (int *nextchar, FILE **input, char ***files, int *nfiles
}
}
}
- if ((queryseq = Sequence_read(&(*nextchar),*input,maponlyp)) != NULL) {
+ if ((queryseq = Sequence_read(&(*nextchar),*input)) != NULL) {
return queryseq;
}
}
@@ -1473,8 +1469,9 @@ Sequence_read_unlimited (int *nextchar, FILE *input) {
}
}
+
void
-Sequence_print_digest (FILE *fp, T this) {
+Sequence_print_digest (Filestring_T fp, T this) {
unsigned char *digest;
digest = MD5_compute((unsigned char *) this->contents,this->fulllength);
@@ -1485,30 +1482,32 @@ Sequence_print_digest (FILE *fp, T this) {
/* Calling procedure needs to print the initial ">", if desired */
void
-Sequence_print_header (FILE *fp, T this, bool checksump) {
+Sequence_print_header (Filestring_T fp, T this, bool checksump) {
if (this->acc == NULL) {
- fprintf(fp,"NO_HEADER");
+ FPRINTF(fp,"NO_HEADER");
} else {
if (this->restofheader == NULL || this->restofheader[0] == '\0') {
- fprintf(fp,"%s",this->acc);
+ FPRINTF(fp,"%s",this->acc);
} else {
- fprintf(fp,"%s %s",this->acc,this->restofheader);
+ FPRINTF(fp,"%s %s",this->acc,this->restofheader);
}
if (checksump == true) {
- fprintf(fp," md5:");
+ FPRINTF(fp," md5:");
Sequence_print_digest(fp,this);
}
}
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
return;
}
+#if 0
+/* Used by revcomp.c */
void
-Sequence_print_header_revcomp (T this) {
+Sequence_stdout_header_revcomp (T this) {
if (this->restofheader == NULL || this->restofheader[0] == '\0') {
printf(">%s",this->acc);
} else {
@@ -1518,11 +1517,47 @@ Sequence_print_header_revcomp (T this) {
printf("\n");
return;
}
+#endif
+
+
+void
+Sequence_print (Filestring_T fp, T this, bool uppercasep, int wraplength, bool trimmedp) {
+ int i = 0, pos, start, end;
+ char uppercaseCode[128] = UPPERCASE_STD;
+
+ if (trimmedp == true) {
+ start = this->trimstart;
+ end = this->trimend;
+ } else {
+ start = 0;
+ end = this->fulllength;
+ }
+ if (uppercasep == true) {
+ for (pos = start; pos < end; pos++, i++) {
+ PUTC(uppercaseCode[(int) this->contents[i]],fp);
+ if ((i+1) % wraplength == 0) {
+ PUTC('\n',fp);
+ }
+ }
+ } else {
+ for (pos = start; pos < end; pos++, i++) {
+ PUTC(this->contents[i],fp);
+ if ((i+1) % wraplength == 0) {
+ PUTC('\n',fp);
+ }
+ }
+ }
+ if (i % wraplength != 0) {
+ PUTC('\n',fp);
+ }
+
+ return;
+}
void
-Sequence_print (FILE *fp, T this, bool uppercasep, int wraplength, bool trimmedp) {
+Sequence_stdout (T this, bool uppercasep, int wraplength, bool trimmedp) {
int i = 0, pos, start, end;
char uppercaseCode[128] = UPPERCASE_STD;
@@ -1536,28 +1571,29 @@ Sequence_print (FILE *fp, T this, bool uppercasep, int wraplength, bool trimmedp
if (uppercasep == true) {
for (pos = start; pos < end; pos++, i++) {
- putc(uppercaseCode[(int) this->contents[i]],fp);
+ putchar(uppercaseCode[(int) this->contents[i]]);
if ((i+1) % wraplength == 0) {
- putc('\n',fp);
+ putchar('\n');
}
}
} else {
for (pos = start; pos < end; pos++, i++) {
- putc(this->contents[i],fp);
+ putchar(this->contents[i]);
if ((i+1) % wraplength == 0) {
- putc('\n',fp);
+ putchar('\n');
}
}
}
if (i % wraplength != 0) {
- putc('\n',fp);
+ putchar('\n');
}
+
return;
}
void
-Sequence_print_alt (T ref, T alt, T snp, bool uppercasep, int wraplength) {
+Sequence_stdout_alt (T ref, T alt, T snp, bool uppercasep, int wraplength) {
int i = 0, pos, start, end;
char uppercaseCode[128] = UPPERCASE_STD;
@@ -1603,7 +1639,7 @@ Sequence_print_alt (T ref, T alt, T snp, bool uppercasep, int wraplength) {
void
-Sequence_print_two (T ref, T alt, bool uppercasep, int wraplength) {
+Sequence_stdout_two (T ref, T alt, bool uppercasep, int wraplength) {
int i = 0, pos, pos2, startpos, end;
char uppercaseCode[128] = UPPERCASE_STD;
@@ -1693,7 +1729,7 @@ Sequence_print_two (T ref, T alt, bool uppercasep, int wraplength) {
void
-Sequence_print_raw (T this) {
+Sequence_stdout_raw (T this) {
int i = 0, pos, start, end;
start = 0;
diff --git a/src/sequence.h b/src/sequence.h
index 45108f0..9ba3fbc 100644
--- a/src/sequence.h
+++ b/src/sequence.h
@@ -1,13 +1,13 @@
-/* $Id: sequence.h 157232 2015-01-22 18:55:31Z twu $ */
+/* $Id: sequence.h 157225 2015-01-22 18:47:23Z twu $ */
#ifndef SEQUENCE_INCLUDED
#define SEQUENCE_INCLUDED
-
#ifdef HAVE_CONFIG_H
#include <config.h> /* For HAVE_ZLIB, HAVE_BZLIB */
#endif
#include <stdio.h>
#include "bool.h"
+#include "filestring.h"
#ifdef HAVE_ZLIB
#include <zlib.h>
@@ -82,13 +82,13 @@ Sequence_free (T *old);
extern T
Sequence_genomic_new (char *contents, int length, bool copyp);
extern T
-Sequence_read (int *nextchar, FILE *input, bool maponlyp);
+Sequence_read (int *nextchar, FILE *input);
extern T
-Sequence_read_multifile (int *nextchar, FILE **input, char ***files, int *nfiles, bool maponlyp);
-
-
+Sequence_read_multifile (int *nextchar, FILE **input, char ***files, int *nfiles);
extern T
Sequence_read_unlimited (int *nextchar, FILE *input);
+
+
#ifdef PMAP
extern char
Sequence_codon_char (char aa, int codonpos);
@@ -109,26 +109,22 @@ Sequence_alias (T this);
extern void
-Sequence_print_digest (FILE *fp, T this);
+Sequence_print_digest (Filestring_T fp, T this);
extern void
-Sequence_print_header (FILE *fp, T this, bool checksump);
-extern void
-Sequence_print_header_revcomp (T this);
+Sequence_print_header (Filestring_T fp, T this, bool checksump);
extern void
-Sequence_print (FILE *fp, T this, bool uppercasep, int wraplength, bool trimmedp);
-extern void
-Sequence_print_alt (T ref, T alt, T snp, bool uppercasep, int wraplength);
-extern void
-Sequence_print_two (T ref, T alt, bool uppercasep, int wraplength);
+Sequence_print (Filestring_T fp, T this, bool uppercasep, int wraplength, bool trimmedp);
extern void
-Sequence_print_oneline (FILE *fp, T this);
+Sequence_stdout (T this, bool uppercasep, int wraplength, bool trimmedp);
+extern void
+Sequence_stdout_alt (T ref, T alt, T snp, bool uppercasep, int wraplength);
extern void
-Sequence_print_oneline_revcomp (FILE *fp, T this);
+Sequence_stdout_two (T ref, T alt, bool uppercasep, int wraplength);
extern void
-Sequence_print_raw (T this);
+Sequence_stdout_raw (T this);
extern T
Sequence_substring (T usersegment, unsigned int left, unsigned int length,
diff --git a/src/shortread.c b/src/shortread.c
index 6db232e..10f624c 100644
--- a/src/shortread.c
+++ b/src/shortread.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: shortread.c 160875 2015-03-13 00:26:46Z twu $";
+static char rcsid[] = "$Id: shortread.c 160871 2015-03-13 00:14:41Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -30,6 +30,7 @@ static char rcsid[] = "$Id: shortread.c 160875 2015-03-13 00:26:46Z twu $";
#include "assert.h"
#include "mem.h"
+#include "access.h"
#include "complement.h"
#include "intlist.h"
#include "fopen.h"
@@ -55,6 +56,13 @@ static char rcsid[] = "$Id: shortread.c 160875 2015-03-13 00:26:46Z twu $";
#define debug2(x)
#endif
+/* File open/close. Want to turn on in gsnap.c also. */
+#ifdef DEBUGF
+#define debugf(x) x
+#else
+#define debugf(x)
+#endif
+
/***********************************************************************
@@ -206,15 +214,239 @@ static char Quality[MAX_READLENGTH+1];
*/
+#if 0
+
+static char *
+mpi_fgets (char *s, int size, MPI_File stream) {
+ char *orig = s;
+ MPI_Status status;
+
+ if (--size > 0) {
+ MPI_File_read(stream,s,1,MPI_CHAR,&status);
+ if (*s == '\0') {
+ return (char *) NULL;
+ } else if (*s == '\n') {
+ *++s = '\0';
+ return orig;
+ } else {
+ s++;
+ }
+ }
+
+ while (--size > 0) { /* read at most size-1 characters */
+ MPI_File_read(stream,s,1,MPI_CHAR,&status);
+ if (*s == '\0') {
+ return orig;
+ } else if (*s == '\n') {
+ *++s = '\0';
+ return orig;
+ } else {
+ s++;
+ }
+ }
+
+ *s = '\0';
+ return orig;
+}
+#endif
+
+
+#ifdef USE_MPI
+static char *
+fgets_filecontents (char *s, int size, char **filecontents) {
+ char *orig = s;
+
+ if (--size > 0) {
+ *s = *(*filecontents)++;
+ if (*s == '\0') {
+ return (char *) NULL;
+ } else if (*s == '\n') {
+ *++s = '\0';
+ return orig;
+ } else {
+ s++;
+ }
+ }
+
+ while (--size > 0) { /* read at most size-1 characters */
+ *s = *(*filecontents)++;
+ if (*s == '\0') {
+ return orig;
+ } else if (*s == '\n') {
+ *++s = '\0';
+ return orig;
+ } else {
+ s++;
+ }
+ }
+
+ *s = '\0';
+ return orig;
+}
+#endif
+
+
+
+#if 0
+off_t
+Shortread_find_read (FILE *fp) {
+ off_t scan = 0;
+ int c;
+
+ while ((c = fgetc(fp)) != EOF) {
+ scan += 1;
+ if (c == '\n') {
+ if ((c = fgetc(fp)) == EOF) {
+ return scan;
+ } else if (c == '>') {
+ return scan;
+ } else if (c == '@') {
+ return scan;
+ } else {
+ scan += 1;
+ }
+ }
+ }
+
+ return scan;
+}
+
+
+unsigned long long **
+Shortread_input_divide (bool *fastq_format_p, char **files, int nfiles, int naliquots) {
+ unsigned long long **aliquots, ptr, filesize1, filesize2, filestep1, filestep2;
+ FILE *input1, *input2;
+ int c;
+ int workeri, filei;
+
+ Header[0] = '\0';
+
+ aliquots = (unsigned long long **) CALLOC(naliquots+1,sizeof(unsigned long long *));
+ for (workeri = 0; workeri <= naliquots; workeri++) {
+ aliquots[workeri] = (unsigned long long *) CALLOC(nfiles,sizeof(unsigned long long));
+ }
+
+
+ filei = 0;
+ while (filei < nfiles) {
+ filesize1 = Access_filesize(files[filei]);
+ filestep1 = filesize1/naliquots;
+ if ((input1 = FOPEN_READ_TEXT(files[filei])) == NULL) {
+ fprintf(stderr,"Cannot open file %s\n",files[filei]);
+ exit(9);
+ } else {
+ if ((c = fgetc(input1)) == EOF) {
+ abort();
+ } else if (c == '>') {
+ *fastq_format_p = false;
+ } else if (c == '@') {
+ *fastq_format_p = true;
+ } else {
+ fprintf(stderr,"Expected file to start with > or @\n");
+ exit(9);
+ }
+
+ workeri = 0;
+ aliquots[workeri][filei] = 0;
+
+ for (workeri = 1; workeri < naliquots; workeri++) {
+ ptr = filestep1 * workeri;
+ fseek(input1,ptr,SEEK_SET);
+ aliquots[workeri][filei] = ptr + Shortread_find_read(input1);
+ fprintf(stderr,"%u => %u\n",ptr,aliquots[workeri][filei]);
+ }
+
+ aliquots[naliquots][filei] = filesize1;
+ }
+
+ filei++;
+ }
+
+ return aliquots;
+}
+#endif
+
+
/* Returns '>' if FASTA file, first sequence char if not */
+#if 0 /* was defined(USE_MPI) && defined(USE_MPI_FILE_INPUT), but not needed */
+/* Needed by master in MPI version */
+static int
+mpi_fgetc (MPI_File stream) {
+ char buffer[1];
+ MPI_Status status;
+
+ MPI_File_read(stream,buffer,1,MPI_CHAR,&status);
+ return buffer[0];
+}
+
+int
+Shortread_input_init_mpi_file (int *nchars, MPI_File fp) {
+ int c;
+ bool okayp = false;
+
+ Header[0] = '\0';
+
+ while (okayp == false && (c = mpi_fgetc(fp)) != '\0') {
+ *nchars += 1;
+ debug(printf("nchars %d: Read character %c\n",*nchars,c));
+ if (iscntrl(c)) {
+#ifdef DASH
+ } else if (c == DASH) {
+#endif
+ } else if (isspace(c)) {
+ } else {
+ okayp = true;
+ }
+ }
+ if (okayp == false) {
+ *nchars += 1;
+ }
+
+ debug(printf("nchars %d: Returning initial character %c\n",*nchars,c));
+ return c;
+}
+#endif
+
+
int
-Shortread_input_init (FILE *fp) {
+Shortread_input_init (int *nchars, FILE *fp) {
int c;
bool okayp = false;
Header[0] = '\0';
while (okayp == false && (c = fgetc(fp)) != EOF) {
+ *nchars += 1;
+ debug(printf("nchars %d: Read character %c\n",*nchars,c));
+ if (iscntrl(c)) {
+#ifdef DASH
+ } else if (c == DASH) {
+#endif
+ } else if (isspace(c)) {
+ } else {
+ okayp = true;
+ }
+ }
+ if (okayp == false) {
+ *nchars += 1;
+ }
+
+ debug(printf("nchars %d: Returning initial character %c\n",*nchars,c));
+ return c;
+}
+
+
+
+#ifdef USE_MPI
+/* Returns '>' if FASTA file, first sequence char if not */
+static int
+Shortread_input_init_filecontents (char **filecontents) {
+ int c;
+ bool okayp = false;
+
+ Header[0] = '\0';
+
+ while (okayp == false && (c = *(*filecontents)++) != EOF && c != '\0') {
debug(printf("Read character %c\n",c));
if (iscntrl(c)) {
#ifdef DASH
@@ -229,6 +461,8 @@ Shortread_input_init (FILE *fp) {
debug(printf("Returning initial character %c\n",c));
return c;
}
+#endif
+
#ifdef HAVE_ZLIB
@@ -289,16 +523,25 @@ static int acc_fieldi_end = 0;
static bool force_single_end_p = false;
static bool filter_chastity_p = true;
static bool allow_paired_end_mismatch_p = false;
+static bool fastq_format_p;
+static int barcode_length;
+static bool invert_first_p;
+static bool invert_second_p;
void
Shortread_setup (int acc_fieldi_start_in, int acc_fieldi_end_in,
bool force_single_end_p_in, bool filter_chastity_p_in,
- bool allow_paired_end_mismatch_p_in) {
+ bool allow_paired_end_mismatch_p_in, bool fastq_format_p_in,
+ int barcode_length_in, bool invert_first_p_in, bool invert_second_p_in) {
acc_fieldi_start = acc_fieldi_start_in;
acc_fieldi_end = acc_fieldi_end_in;
force_single_end_p = force_single_end_p_in;
filter_chastity_p = filter_chastity_p_in;
allow_paired_end_mismatch_p = allow_paired_end_mismatch_p_in;
+ fastq_format_p = fastq_format_p_in;
+ barcode_length = barcode_length_in;
+ invert_first_p = invert_first_p_in;
+ invert_second_p = invert_second_p_in;
return;
}
@@ -307,15 +550,93 @@ Shortread_setup (int acc_fieldi_start_in, int acc_fieldi_end_in,
static char *skipped_acc = "Skipped";
static char *
-input_header (bool *filterp, char **restofheader, FILE *fp, bool skipp) {
+input_header (int *nchars, bool *filterp, char **restofheader, int nextchar,
+ FILE *fp, bool skipp) {
+ char *acc = NULL, *p, *q;
+ size_t length;
+
+ *filterp = false;
+
+ if (nextchar == EOF) { /* Was feof(fp) */
+ return (char *) NULL;
+ } else if ((p = fgets(&(Header[0]),HEADERLEN,fp)) == NULL) {
+ /* File must terminate after > */
+ return (char *) NULL;
+ } else {
+ *nchars += strlen(p);
+ }
+
+ if (Header[0] == '\n') {
+ Header[0] = '\0';
+ } else if ((p = rindex(&(Header[0]),'\n')) != NULL) {
+ if (p[-1] == '\r') {
+ p--;
+ }
+ *p = '\0';
+ } else {
+ /* Eliminate rest of header from input */
+ while ((p = fgets(&(Discard[0]),DISCARDLEN,fp)) != NULL &&
+ rindex(&(Discard[0]),'\n') == NULL) {
+ *nchars += strlen(p);
+ }
+ }
+
+ if (skipp == true) {
+ return (char *) skipped_acc;
+ } else {
+ p = &(Header[0]);
+ while (*p != '\0' && !isspace((int) *p)) {
+ p++;
+ }
+
+ if (filter_chastity_p == true) {
+ q = p;
+ /* Expecting <read>:<is filtered>:<control number>:<index sequence>, e.g., 1:Y:0:CTTGTA */
+ while (*q != '\0' && *q != ':') {
+ q++;
+ }
+ if (*q != '\0') {
+ q++;
+ if (*q == 'Y') {
+ *filterp = true;
+ }
+ }
+ }
+
+ if (*p == '\0') {
+ /* Accession only */
+ length = (p - &(Header[0]))/sizeof(char);
+ acc = (char *) CALLOC_IN(length+1,sizeof(char));
+ strcpy(acc,Header);
+ (*restofheader) = (char *) CALLOC_IN(1,sizeof(char));
+ (*restofheader)[0] = '\0';
+ } else {
+ *p = '\0';
+ length = (p - &(Header[0]))/sizeof(char);
+ acc = (char *) CALLOC_IN(length+1,sizeof(char));
+ strcpy(acc,Header);
+ p++;
+ *restofheader = (char *) CALLOC_IN(strlen(p)+1,sizeof(char));
+ strcpy(*restofheader,p);
+ }
+
+ return acc;
+ }
+}
+
+
+#ifdef USE_MPI
+static char *
+input_header_filecontents (bool *filterp, char **restofheader, int nextchar,
+ char **filecontents, bool skipp) {
char *acc = NULL, *p, *q;
size_t length;
*filterp = false;
- if (feof(fp)) {
+ if (nextchar == EOF || nextchar == '\0') { /* Was feof(fp) */
return (char *) NULL;
- } else if (fgets(&(Header[0]),HEADERLEN,fp) == NULL) {
+ } else if (fgets_filecontents(&(Header[0]),HEADERLEN,&(*filecontents)) == NULL) {
/* File must terminate after > */
return (char *) NULL;
}
@@ -329,7 +650,7 @@ input_header (bool *filterp, char **restofheader, FILE *fp, bool skipp) {
*p = '\0';
} else {
/* Eliminate rest of header from input */
- while (fgets(&(Discard[0]),DISCARDLEN,fp) != NULL &&
+ while (fgets_filecontents(&(Discard[0]),DISCARDLEN,&(*filecontents)) != NULL &&
rindex(&(Discard[0]),'\n') == NULL) ;
}
@@ -375,21 +696,30 @@ input_header (bool *filterp, char **restofheader, FILE *fp, bool skipp) {
return acc;
}
}
+#endif
#ifdef HAVE_ZLIB
static char *
-input_header_gzip (bool *filterp, char **restofheader, gzFile fp, bool skipp) {
+input_header_gzip (bool *filterp, char **restofheader, int nextchar,
+#ifdef USE_MPI
+ Filestring_T filestring,
+#endif
+ gzFile fp, bool skipp) {
char *acc = NULL, *p, *q;
size_t length;
*filterp = false;
- if (gzeof(fp)) {
+ if (nextchar == EOF) { /* Was gzeof(fp) */
return NULL;
- } else if (gzgets(fp,&(Header[0]),HEADERLEN) == NULL) {
+ } else if ((p = gzgets(fp,&(Header[0]),HEADERLEN)) == NULL) {
/* File must terminate after > */
return NULL;
+#ifdef USE_MPI
+ } else {
+ Filestring_puts(filestring,p,strlen(p));
+#endif
}
if (Header[0] == '\n') {
@@ -401,8 +731,15 @@ input_header_gzip (bool *filterp, char **restofheader, gzFile fp, bool skipp) {
*p = '\0';
} else {
/* Eliminate rest of header from input */
+#ifdef USE_MPI
+ while ((p = gzgets(fp,&(Discard[0]),DISCARDLEN)) != NULL &&
+ rindex(&(Discard[0]),'\n') == NULL) {
+ Filestring_puts(filestring,p,strlen(p));
+ }
+#else
while (gzgets(fp,&(Discard[0]),DISCARDLEN) != NULL &&
rindex(&(Discard[0]),'\n') == NULL) ;
+#endif
}
if (skipp) {
@@ -452,17 +789,25 @@ input_header_gzip (bool *filterp, char **restofheader, gzFile fp, bool skipp) {
#ifdef HAVE_BZLIB
static char *
-input_header_bzip2 (bool *filterp, char **restofheader, Bzip2_T fp, bool skipp) {
+input_header_bzip2 (bool *filterp, char **restofheader, int nextchar,
+#ifdef USE_MPI
+ Filestring_T filestring,
+#endif
+ Bzip2_T fp, bool skipp) {
char *acc = NULL, *p, *q;
size_t length;
*filterp = false;
- if (bzeof(fp)) {
+ if (nextchar == EOF) { /* Was bzeof(fp) */
return NULL;
- } else if (bzgets(fp,&(Header[0]),HEADERLEN) == NULL) {
+ } else if ((p = bzgets(fp,&(Header[0]),HEADERLEN)) == NULL) {
/* File must terminate after > */
return NULL;
+#ifdef USE_MPI
+ } else {
+ Filestring_puts(filestring,p,strlen(p));
+#endif
}
if (Header[0] == '\n') {
@@ -474,8 +819,15 @@ input_header_bzip2 (bool *filterp, char **restofheader, Bzip2_T fp, bool skipp)
*p = '\0';
} else {
/* Eliminate rest of header from input */
+#ifdef USE_MPI
+ while ((p = bzgets(fp,&(Discard[0]),DISCARDLEN)) != NULL &&
+ rindex(&(Discard[0]),'\n') == NULL) {
+ Filestring_puts(filestring,p,strlen(p));
+ }
+#else
while (bzgets(fp,&(Discard[0]),DISCARDLEN) != NULL &&
rindex(&(Discard[0]),'\n') == NULL) ;
+#endif
}
if (skipp) {
@@ -585,18 +937,21 @@ strip_illumina_acc_ending (char *acc1, char *acc2) {
static char *
-input_header_fastq (bool *filterp, char **restofheader, FILE *fp, bool skipp) {
+input_header_fastq (int *nchars, bool *filterp, char **restofheader, int nextchar,
+ FILE *fp, bool skipp) {
char *acc, *p, *q, *start;
size_t length;
int fieldi = 0;
*filterp = false;
- if (feof(fp)) {
+ if (nextchar == EOF) { /* Was feof(fp) */
return NULL;
- } else if (fgets(&(Header[0]),HEADERLEN,fp) == NULL) {
+ } else if ((p = fgets(&(Header[0]),HEADERLEN,fp)) == NULL) {
/* File must terminate after > */
return NULL;
+ } else {
+ *nchars += strlen(p);
}
if (Header[0] == '\n') {
@@ -608,8 +963,10 @@ input_header_fastq (bool *filterp, char **restofheader, FILE *fp, bool skipp) {
*p = '\0';
} else {
/* Eliminate rest of header from input */
- while (fgets(&(Discard[0]),DISCARDLEN,fp) != NULL &&
- rindex(&(Discard[0]),'\n') == NULL) ;
+ while ((p = fgets(&(Discard[0]),DISCARDLEN,fp)) != NULL &&
+ rindex(&(Discard[0]),'\n') == NULL) {
+ *nchars += strlen(p);
+ }
}
if (skipp == true) {
@@ -668,18 +1025,19 @@ input_header_fastq (bool *filterp, char **restofheader, FILE *fp, bool skipp) {
}
-#ifdef HAVE_ZLIB
+#ifdef USE_MPI
static char *
-input_header_fastq_gzip (bool *filterp, char **restofheader, gzFile fp, bool skipp) {
+input_header_fastq_filecontents (bool *filterp, char **restofheader, int nextchar,
+ char **filecontents, bool skipp) {
char *acc, *p, *q, *start;
size_t length;
int fieldi = 0;
*filterp = false;
- if (gzeof(fp)) {
+ if (nextchar == EOF || nextchar == '\0') { /* Was feof(fp) */
return NULL;
- } else if (gzgets(fp,&(Header[0]),HEADERLEN) == NULL) {
+ } else if (fgets_filecontents(&(Header[0]),HEADERLEN,&(*filecontents)) == NULL) {
/* File must terminate after > */
return NULL;
}
@@ -693,11 +1051,11 @@ input_header_fastq_gzip (bool *filterp, char **restofheader, gzFile fp, bool ski
*p = '\0';
} else {
/* Eliminate rest of header from input */
- while (gzgets(fp,&(Discard[0]),DISCARDLEN) != NULL &&
+ while (fgets_filecontents(&(Discard[0]),DISCARDLEN,&(*filecontents)) != NULL &&
rindex(&(Discard[0]),'\n') == NULL) ;
}
- if (skipp) {
+ if (skipp == true) {
return (char *) skipped_acc;
} else {
p = start = &(Header[0]);
@@ -754,20 +1112,28 @@ input_header_fastq_gzip (bool *filterp, char **restofheader, gzFile fp, bool ski
#endif
-#ifdef HAVE_BZLIB
+#ifdef HAVE_ZLIB
static char *
-input_header_fastq_bzip2 (bool *filterp, char **restofheader, Bzip2_T fp, bool skipp) {
+input_header_fastq_gzip (bool *filterp, char **restofheader, int nextchar,
+#ifdef USE_MPI
+ Filestring_T filestring,
+#endif
+ gzFile fp, bool skipp) {
char *acc, *p, *q, *start;
size_t length;
int fieldi = 0;
*filterp = false;
- if (bzeof(fp)) {
+ if (nextchar == EOF) { /* Was gzeof(fp) */
return NULL;
- } else if (bzgets(fp,&(Header[0]),HEADERLEN) == NULL) {
+ } else if ((p = gzgets(fp,&(Header[0]),HEADERLEN)) == NULL) {
/* File must terminate after > */
return NULL;
+#ifdef USE_MPI
+ } else {
+ Filestring_puts(filestring,p,strlen(p));
+#endif
}
if (Header[0] == '\n') {
@@ -779,8 +1145,15 @@ input_header_fastq_bzip2 (bool *filterp, char **restofheader, Bzip2_T fp, bool s
*p = '\0';
} else {
/* Eliminate rest of header from input */
- while (bzgets(fp,&(Discard[0]),DISCARDLEN) != NULL &&
+#ifdef USE_MPI
+ while ((p = gzgets(fp,&(Discard[0]),DISCARDLEN)) != NULL &&
+ rindex(&(Discard[0]),'\n') == NULL) {
+ Filestring_puts(filestring,p,strlen(p));
+ }
+#else
+ while (gzgets(fp,&(Discard[0]),DISCARDLEN) != NULL &&
rindex(&(Discard[0]),'\n') == NULL) ;
+#endif
}
if (skipp) {
@@ -840,31 +1213,158 @@ input_header_fastq_bzip2 (bool *filterp, char **restofheader, Bzip2_T fp, bool s
#endif
-static bool
-skip_header (FILE *fp) {
+#ifdef HAVE_BZLIB
+static char *
+input_header_fastq_bzip2 (bool *filterp, char **restofheader, int nextchar,
+#ifdef USE_MPI
+ Filestring_T filestring,
+#endif
+ Bzip2_T fp, bool skipp) {
+ char *acc, *p, *q, *start;
+ size_t length;
+ int fieldi = 0;
- if (feof(fp)) {
- return false;
- } else if (fgets(&(Header[0]),HEADERLEN,fp) == NULL) {
+ *filterp = false;
+
+ if (nextchar == EOF) { /* Was bzeof(fp) */
+ return NULL;
+ } else if ((p = bzgets(fp,&(Header[0]),HEADERLEN)) == NULL) {
/* File must terminate after > */
- return false;
+ return NULL;
+#ifdef USE_MPI
+ } else {
+ Filestring_puts(filestring,p,strlen(p));
+#endif
}
- if (rindex(&(Header[0]),'\n') == NULL) {
+ if (Header[0] == '\n') {
+ Header[0] = '\0';
+ } else if ((p = rindex(&(Header[0]),'\n')) != NULL) {
+ if (p[-1] == '\r') {
+ p--;
+ }
+ *p = '\0';
+ } else {
/* Eliminate rest of header from input */
- while (fgets(&(Discard[0]),DISCARDLEN,fp) != NULL &&
+#ifdef USE_MPI
+ while ((p = bzgets(fp,&(Discard[0]),DISCARDLEN)) != NULL &&
+ rindex(&(Discard[0]),'\n') == NULL) {
+ Filestring_puts(filestring,p,strlen(p));
+ }
+#else
+ while (bzgets(fp,&(Discard[0]),DISCARDLEN) != NULL &&
rindex(&(Discard[0]),'\n') == NULL) ;
+#endif
}
- return true;
-}
-
-#ifdef HAVE_ZLIB
-static bool
-skip_header_gzip (gzFile fp) {
-
- if (gzeof(fp)) {
- return false;
+ if (skipp) {
+ return (char *) skipped_acc;
+ } else {
+ p = start = &(Header[0]);
+ while (fieldi < acc_fieldi_start) {
+ while (*p != '\0' && !isspace((int) *p)) {
+ p++;
+ }
+ if (*p != '\0') {
+ p++;
+ }
+ start = p;
+ fieldi++;
+ }
+
+ while (fieldi < acc_fieldi_end) {
+ while (*p != '\0' && !isspace((int) *p)) {
+ p++;
+ }
+ if (*p != '\0') {
+ p++;
+ }
+ fieldi++;
+ }
+
+ while (*p != '\0' && !isspace((int) *p)) {
+ p++;
+ }
+
+ if (filter_chastity_p == true) {
+ q = p;
+ /* Expecting <read>:<is filtered>:<control number>:<index sequence>, e.g., 1:Y:0:CTTGTA */
+ while (*q != '\0' && *q != ':') {
+ q++;
+ }
+ if (*q != '\0') {
+ q++;
+ if (*q == 'Y') {
+ *filterp = true;
+ }
+ }
+ }
+
+ *p = '\0';
+
+ length = (p - start)/sizeof(char);
+ acc = (char *) CALLOC_IN(length+1,sizeof(char));
+ strcpy(acc,start);
+ *restofheader = (char *) CALLOC_IN(1,sizeof(char));
+ (*restofheader)[0] = '\0';
+
+ return acc;
+ }
+}
+#endif
+
+
+static bool
+skip_header (int *nchars, FILE *fp, int nextchar) {
+ char *p;
+
+ if (nextchar == EOF) {
+ return false;
+ } else if ((p = fgets(&(Header[0]),HEADERLEN,fp)) == NULL) {
+ /* File must terminate after > */
+ return false;
+ } else {
+ *nchars += strlen(p);
+ }
+
+ if (rindex(&(Header[0]),'\n') == NULL) {
+ /* Eliminate rest of header from input */
+ while ((p = fgets(&(Discard[0]),DISCARDLEN,fp)) != NULL &&
+ rindex(&(Discard[0]),'\n') == NULL) {
+ *nchars += strlen(p);
+ }
+ }
+
+ return true;
+}
+
+#ifdef USE_MPI
+static bool
+skip_header_filecontents (char **filecontents, int nextchar) {
+
+ if (nextchar == EOF || nextchar == '\0') {
+ return false;
+ } else if (fgets_filecontents(&(Header[0]),HEADERLEN,&(*filecontents)) == NULL) {
+ /* File must terminate after > */
+ return false;
+ }
+
+ if (rindex(&(Header[0]),'\n') == NULL) {
+ /* Eliminate rest of header from input */
+ while (fgets_filecontents(&(Discard[0]),DISCARDLEN,&(*filecontents)) != NULL &&
+ rindex(&(Discard[0]),'\n') == NULL) ;
+ }
+
+ return true;
+}
+#endif
+
+#ifdef HAVE_ZLIB
+static bool
+skip_header_gzip (gzFile fp, int nextchar) {
+
+ if (nextchar == EOF) { /* was gzeof(fp) */
+ return false;
} else if (gzgets(fp,&(Header[0]),HEADERLEN) == NULL) {
/* File must terminate after > */
return false;
@@ -882,9 +1382,9 @@ skip_header_gzip (gzFile fp) {
#ifdef HAVE_BZLIB
static bool
-skip_header_bzip2 (Bzip2_T fp) {
+skip_header_bzip2 (Bzip2_T fp, int nextchar) {
- if (bzeof(fp)) {
+ if (nextchar == EOF) { /* was bzeof(fp) */
return false;
} else if (bzgets(fp,&(Header[0]),HEADERLEN) == NULL) {
/* File must terminate after > */
@@ -968,9 +1468,9 @@ find_spaces (int *nspaces, char *line) {
}
-
static int
-input_oneline (int *nextchar, char **longstring, char *Start, FILE *fp, bool possible_fasta_header_p) {
+input_oneline (int *nextchar, int *nchars, char **longstring, char *Start,
+ FILE *fp, bool possible_fasta_header_p) {
int remainder;
char *ptr, *p = NULL;
int strlenp, nspaces;
@@ -984,15 +1484,19 @@ input_oneline (int *nextchar, char **longstring, char *Start, FILE *fp, bool pos
ptr = &(Start[0]);
remainder = (&(Start[MAX_READLENGTH]) - ptr)/sizeof(char);
if (*nextchar == EOF || (possible_fasta_header_p == true && (*nextchar == '>' || *nextchar == '+'))) {
- debug(printf("Returning 0\n"));
+ debug(printf("nchars %d: EOF or > or +: Returning 0\n",*nchars));
+ return 0;
+ } else if (*nextchar == '\n') {
+ debug(printf("nchars %d: Blank line: Returning 0\n",*nchars));
return 0;
} else {
*ptr++ = (char) *nextchar;
if ((p = fgets(ptr,remainder+1,fp)) == NULL) {
/* NULL if file ends with a blank line */
- printf("Blank line. read %s.\n",ptr);
+ debug(printf("Blank line. read %s.\n",ptr));
} else {
- debug(printf("Read %s.\n",ptr));
+ *nchars += strlen(p);
+ debug(printf("nchars %d: Read %s.\n",*nchars,ptr));
#if 0
if (pc_linefeeds_p == true) {
#endif
@@ -1016,27 +1520,35 @@ input_oneline (int *nextchar, char **longstring, char *Start, FILE *fp, bool pos
p--;
}
*p = '\0';
- debug(printf("Now string is %s.\n",ptr));
- } else if (feof(fp)) {
+ debug(printf("nchars %d: Now string is %s.\n",*nchars,ptr));
+ } else if (*ptr == EOF) {
/* No line feed, but end of file. Handle below. */
- debug(printf("End of file seen\n"));
+ debug(printf("nchars %d: End of file seen\n",*nchars));
} else {
/* No line feed, but not end of file. Read too long, so using another method. */
+ debug(printf("No line feed, but not end of file. Using Intlist_T.\n"));
intlist = (Intlist_T) NULL;
- for (i = 0; i <= MAX_READLENGTH; i++) {
+ i = 0;
+ while (i <= MAX_READLENGTH && Start[i] != '\0') {
+ debug(printf("Pushing %c\n",Start[i]));
intlist = Intlist_push_in(intlist,Start[i]);
+ i++;
}
while ((*nextchar = fgetc(fp)) != EOF && *nextchar != '\n') {
+ *nchars += 1;
intlist = Intlist_push_in(intlist,*nextchar);
}
+ *nchars += 1;
if (*nextchar == '\n') {
*nextchar = fgetc(fp);
+ *nchars += 1;
}
intlist = Intlist_reverse(intlist);
*longstring = Intlist_to_char_array(&i,intlist);
Intlist_free_in(&intlist);
+ debug(printf("nchars %d: Intlist method returning %d\n",*nchars,i));
return i;
}
}
@@ -1044,23 +1556,116 @@ input_oneline (int *nextchar, char **longstring, char *Start, FILE *fp, bool pos
ptr += strlen(ptr);
/* Peek at character after eoln */
- if (feof(fp)) {
- *nextchar = EOF;
+ *nextchar = fgetc(fp);
+ *nchars += 1;
+
+ debug(printf("nchars %d: Returning %ld with nextchar %c\n",*nchars,(ptr - &(Start[0]))/sizeof(char),*nextchar));
+ return (ptr - &(Start[0]))/sizeof(char);
+ }
+}
+
+
+#ifdef USE_MPI
+static int
+input_oneline_filecontents (int *nextchar, char **longstring, char *Start,
+ char **filecontents, bool possible_fasta_header_p) {
+ int remainder;
+ char *ptr, *p = NULL;
+ int strlenp, nspaces;
+
+ int i;
+ Intlist_T intlist;
+
+ debug(printf("Entering input_oneline with nextchar = %c\n",*nextchar));
+ *longstring = (char *) NULL;
+
+ ptr = &(Start[0]);
+ remainder = (&(Start[MAX_READLENGTH]) - ptr)/sizeof(char);
+ if (*nextchar == EOF || *nextchar == '\0' ||
+ (possible_fasta_header_p == true && (*nextchar == '>' || *nextchar == '+'))) {
+ debug(printf("EOF or > or +: Returning 0\n"));
+ return 0;
+ } else if (*nextchar == '\n') {
+ debug(printf("Blank line: Returning 0\n"));
+ return 0;
+ } else {
+ *ptr++ = (char) *nextchar;
+ if ((p = fgets_filecontents(ptr,remainder+1,&(*filecontents))) == NULL) {
+ /* NULL if file ends with a blank line */
+ debug(printf("Blank line. read %s.\n",ptr));
} else {
- while ((*nextchar = fgetc(fp)) != EOF && (*nextchar == '\r' || *nextchar == '\n' || isspace(*nextchar))) {
+ debug(printf("Read %s.\n",ptr));
+#if 0
+ if (pc_linefeeds_p == true) {
+#endif
+ while ((p = find_spaces(&nspaces,ptr)) != NULL) {
+ ptr = p;
+ p += nspaces;
+ strlenp = strlen(p);
+ memmove(ptr,p,strlenp);
+ ptr[strlenp] = '\0';
+ debug(printf("Found %d spaces. Did memmove of %d chars at %p to %p\n",nspaces,strlenp,p,ptr));
+ }
+#if 0
+ }
+#endif
+
+ if (*ptr == '\n') {
+ *ptr = '\0';
+ debug(printf("Now string is %s.\n",ptr));
+ } else if ((p = index(ptr,'\n')) != NULL) {
+ if (p[-1] == '\r') {
+ p--;
+ }
+ *p = '\0';
+ debug(printf("Now string is %s.\n",ptr));
+ } else if (*ptr == '\0') {
+ /* No line feed, but end of file. Handle below. */
+ debug(printf("End of file seen\n"));
+ } else {
+ /* No line feed, but not end of file. Read too long, so using another method. */
+ debug(printf("No line feed, but not end of file. Using Intlist_T.\n"));
+ intlist = (Intlist_T) NULL;
+ i = 0;
+ while (i <= MAX_READLENGTH && Start[i] != '\0') {
+ debug(printf("Pushing %c\n",Start[i]));
+ intlist = Intlist_push_in(intlist,Start[i]);
+ i++;
+ }
+ while ((*nextchar = *(*filecontents)++) != '\0' && *nextchar != '\n') {
+ intlist = Intlist_push_in(intlist,*nextchar);
+ }
+ if (*nextchar == '\n') {
+ *nextchar = *(*filecontents)++;
+ }
+
+ intlist = Intlist_reverse(intlist);
+ *longstring = Intlist_to_char_array(&i,intlist);
+ Intlist_free_in(&intlist);
+
+ debug(printf("Intlist method returning %d\n",i));
+ return i;
}
}
- debug(printf("Returning %ld\n",(ptr - &(Start[0]))/sizeof(char)));
+ ptr += strlen(ptr);
+
+ /* Peek at character after eoln */
+ *nextchar = *(*filecontents)++;
+
+ debug(printf("Returning %ld with nextchar %c\n",(ptr - &(Start[0]))/sizeof(char),*nextchar));
return (ptr - &(Start[0]))/sizeof(char);
}
}
-
-
+#endif
#ifdef HAVE_ZLIB
static int
-input_oneline_gzip (int *nextchar, char **longstring, char *Start, gzFile fp, bool possible_fasta_header_p) {
+input_oneline_gzip (int *nextchar, char **longstring, char *Start,
+#ifdef USE_MPI
+ Filestring_T filestring,
+#endif
+ gzFile fp, bool possible_fasta_header_p) {
int remainder;
char *ptr, *p = NULL;
int strlenp, nspaces;
@@ -1074,14 +1679,20 @@ input_oneline_gzip (int *nextchar, char **longstring, char *Start, gzFile fp, bo
ptr = &(Start[0]);
remainder = (&(Start[MAX_READLENGTH]) - ptr)/sizeof(char);
if (*nextchar == EOF || (possible_fasta_header_p == true && (*nextchar == '>' || *nextchar == '+'))) {
- debug(printf("Returning 0\n"));
+ debug(printf("EOF or > or +: Returning 0\n"));
+ return 0;
+ } else if (*nextchar == '\n') {
+ debug(printf("Blank line: Returning 0\n"));
return 0;
} else {
*ptr++ = (char) *nextchar;
if ((p = gzgets(fp,ptr,remainder+1)) == NULL) {
/* NULL if file ends with a blank line */
- printf("Blank line. read %s.\n",ptr);
+ debug(printf("Blank line. read %s.\n",ptr));
} else {
+#ifdef USE_MPI
+ Filestring_puts(filestring,p,strlen(p));
+#endif
debug(printf("Read %s.\n",ptr));
#if 0
if (pc_linefeeds_p == true) {
@@ -1107,40 +1718,52 @@ input_oneline_gzip (int *nextchar, char **longstring, char *Start, gzFile fp, bo
}
*p = '\0';
debug(printf("Now string is %s.\n",ptr));
- } else if (gzeof(fp)) {
+ } else if (*ptr == EOF) {
/* No line feed, but end of file. Handle below. */
debug(printf("End of file seen\n"));
} else {
/* No line feed, but not end of file. Read too long, so using another method. */
+ debug(printf("No line feed, but not end of file. Using Intlist_T.\n"));
intlist = (Intlist_T) NULL;
- for (i = 0; i <= MAX_READLENGTH; i++) {
+ i = 0;
+ while (i <= MAX_READLENGTH && Start[i] != '\0') {
+ debug(printf("Pushing %c\n",Start[i]));
intlist = Intlist_push_in(intlist,Start[i]);
+ i++;
}
while ((*nextchar = gzgetc(fp)) != EOF && *nextchar != '\n') {
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring);
+#endif
intlist = Intlist_push_in(intlist,*nextchar);
}
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring);
+#endif
if (*nextchar == '\n') {
*nextchar = gzgetc(fp);
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring);
+#endif
}
intlist = Intlist_reverse(intlist);
*longstring = Intlist_to_char_array(&i,intlist);
Intlist_free_in(&intlist);
+ debug(printf("Intlist method returning %d\n",i));
return i;
}
}
ptr += strlen(ptr);
/* Peek at character after eoln */
- if (gzeof(fp)) {
- *nextchar = EOF;
- } else {
- while ((*nextchar = gzgetc(fp)) != EOF && (*nextchar == '\r' || *nextchar == '\n' || isspace(*nextchar))) {
- }
- }
+ *nextchar = gzgetc(fp);
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring);
+#endif
- debug(printf("Returning %ld\n",(ptr - &(Start[0]))/sizeof(char)));
+ debug(printf("Returning %ld with nextchar %c\n",(ptr - &(Start[0]))/sizeof(char),*nextchar));
return (ptr - &(Start[0]))/sizeof(char);
}
}
@@ -1148,7 +1771,11 @@ input_oneline_gzip (int *nextchar, char **longstring, char *Start, gzFile fp, bo
#ifdef HAVE_BZLIB
static int
-input_oneline_bzip2 (int *nextchar, char **longstring, char *Start, Bzip2_T fp, bool possible_fasta_header_p) {
+input_oneline_bzip2 (int *nextchar, char **longstring, char *Start,
+#ifdef USE_MPI
+ Filestring_T filestring,
+#endif
+ Bzip2_T fp, bool possible_fasta_header_p) {
int remainder;
char *ptr, *p = NULL;
int strlenp, nspaces;
@@ -1162,14 +1789,20 @@ input_oneline_bzip2 (int *nextchar, char **longstring, char *Start, Bzip2_T fp,
ptr = &(Start[0]);
remainder = (&(Start[MAX_READLENGTH]) - ptr)/sizeof(char);
if (*nextchar == EOF || (possible_fasta_header_p == true && (*nextchar == '>' || *nextchar == '+'))) {
- debug(printf("Returning 0\n"));
+ debug(printf("EOF or > or +: Returning 0\n"));
+ return 0;
+ } else if (*nextchar == '\n') {
+ debug(printf("Blank line: Returning 0\n"));
return 0;
} else {
*ptr++ = (char) *nextchar;
if ((p = bzgets(fp,ptr,remainder+1)) == NULL) {
/* NULL if file ends with a blank line */
- printf("Blank line. read %s.\n",ptr);
+ debug(printf("Blank line. read %s.\n",ptr));
} else {
+#ifdef USE_MPI
+ Filestring_puts(filestring,p,strlen(p));
+#endif
debug(printf("Read %s.\n",ptr));
#if 0
if (pc_linefeeds_p == true) {
@@ -1180,7 +1813,7 @@ input_oneline_bzip2 (int *nextchar, char **longstring, char *Start, Bzip2_T fp,
strlenp = strlen(p);
memmove(ptr,p,strlenp);
ptr[strlenp] = '\0';
- debug(printf("Found %d spaces. Did memmove of %d chars at %p to %p to yield\n",nspaces,strlenp,p,ptr));
+ debug(printf("Found %d spaces. Did memmove of %d chars at %p to %p\n",nspaces,strlenp,p,ptr));
}
#if 0
}
@@ -1195,40 +1828,52 @@ input_oneline_bzip2 (int *nextchar, char **longstring, char *Start, Bzip2_T fp,
}
*p = '\0';
debug(printf("Now string is %s.\n",ptr));
- } else if (bzeof(fp)) {
+ } else if (*ptr == EOF) {
/* No line feed, but end of file. Handle below. */
debug(printf("End of file seen\n"));
} else {
/* No line feed, but not end of file. Read too long, so using another method. */
+ debug(printf("No line feed, but not end of file. Using Intlist_T.\n"));
intlist = (Intlist_T) NULL;
- for (i = 0; i <= MAX_READLENGTH; i++) {
+ i = 0;
+ while (i <= MAX_READLENGTH && Start[i] != '\0') {
+ debug(printf("Pushing %c\n",Start[i]));
intlist = Intlist_push_in(intlist,Start[i]);
+ i++;
}
while ((*nextchar = bzgetc(fp)) != EOF && *nextchar != '\n') {
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring);
+#endif
intlist = Intlist_push_in(intlist,*nextchar);
}
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring);
+#endif
if (*nextchar == '\n') {
*nextchar = bzgetc(fp);
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring);
+#endif
}
intlist = Intlist_reverse(intlist);
*longstring = Intlist_to_char_array(&i,intlist);
Intlist_free_in(&intlist);
+ debug(printf("Intlist method returning %d\n",i));
return i;
}
}
ptr += strlen(ptr);
/* Peek at character after eoln */
- if (bzeof(fp)) {
- *nextchar = EOF;
- } else {
- while ((*nextchar = bzgetc(fp)) != EOF && (*nextchar == '\r' || *nextchar == '\n' || isspace(*nextchar))) {
- }
- }
+ *nextchar = bzgetc(fp);
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring);
+#endif
- debug(printf("Returning %ld\n",(ptr - &(Start[0]))/sizeof(char)));
+ debug(printf("Returning %ld with nextchar %c\n",(ptr - &(Start[0]))/sizeof(char),*nextchar));
return (ptr - &(Start[0]))/sizeof(char);
}
}
@@ -2116,24 +2761,26 @@ Shortread_new (char *acc, char *restofheader, bool filterp,
T
-Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FILE **input2,
- char ***files, int *nfiles, bool skipp,
- int barcode_length, bool invert_first_p, bool invert_second_p) {
+Shortread_read_fasta_text (int *nextchar, int *nchars1, int *nchars2, T *queryseq2,
+ FILE **input1, FILE **input2,
+ char ***files, int *nfiles, bool skipp) {
T queryseq1;
+ int nextchar2;
char *acc, *restofheader, *acc2, *restofheader2;
char *long_read_1, *long_read_2, *long_quality;
- int nextchar2 = '\0';
int fulllength1, fulllength2, quality_length;
bool filterp;
while (1) {
queryseq1 = *queryseq2 = (T) NULL;
- if (*input1 == NULL || feof(*input1)) {
+ if (*input1 == NULL || *nextchar == EOF) { /* was feof(*input1) */
if (*input1 != NULL) {
+ debugf(fprintf(stderr,"Master closing input file 1 using fclose\n"));
fclose(*input1);
*input1 = NULL;
}
if (*input2 != NULL) {
+ debugf(fprintf(stderr,"Master closing input file 2 using fclose\n"));
fclose(*input2);
*input2 = NULL;
}
@@ -2150,10 +2797,11 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
*nextchar = EOF;
return (T) NULL;
} else {
+ debugf(fprintf(stderr,"Master opening input file 1\n"));
*input2 = NULL;
(*files) += 1;
(*nfiles) -= 1;
- *nextchar = '\0';
+ nextchar2 = '\0';
}
} else {
@@ -2174,37 +2822,43 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
}
if (*nextchar == '\0') {
- if ((*nextchar = Shortread_input_init(*input1)) == EOF) {
- *nextchar = EOF;
+ if ((*nextchar = Shortread_input_init(&(*nchars1),*input1)) == EOF) {
return (T) NULL;
}
}
debug(printf("** Getting header\n"));
- if ((acc = input_header(&filterp,&restofheader,*input1,skipp)) == NULL) {
+ if ((acc = input_header(&(*nchars1),&filterp,&restofheader,*nextchar,*input1,skipp)) == NULL) {
/* fprintf(stderr,"No header\n"); */
/* File ends after >. Don't process, but loop again */
*nextchar = EOF;
} else if ((*nextchar = fgetc(*input1)) == '\r' || *nextchar == '\n') {
/* Process blank lines and loop again */
while (*nextchar != EOF && ((*nextchar = fgetc(*input1)) != '>')) {
+ *nchars1 += 1;
+ }
+ if (*nextchar != EOF) {
+ *nchars1 += 1;
}
- } else if ((fulllength1 = input_oneline(&(*nextchar),&long_read_1,&(Read1[0]),*input1,
+ } else if ((fulllength1 = input_oneline(&(*nextchar),&(*nchars1),&long_read_1,&(Read1[0]),*input1,
/*possible_fasta_header_p*/true)) == 0) {
+ *nchars1 += 1; /* For first "else if" clause */
/* fprintf(stderr,"length is zero\n"); */
/* No sequence1. Don't process, but loop again */
/* *nextchar = EOF; */
} else {
+ *nchars1 += 1; /* For first "else if" clause */
/* queryseq1 is in Read1 */
/* See what is in next line */
- if ((fulllength2 = input_oneline(&(*nextchar),&long_read_2,&(Read2[0]),*input1,
+ if ((fulllength2 = input_oneline(&(*nextchar),&(*nchars1),&long_read_2,&(Read2[0]),*input1,
/*possible_fasta_header_p*/true)) > 0) {
/* Paired-end, single file. queryseq1 is in Read1 and queryseq2 is in Read2 */
if (*nextchar == '+') {
/* Paired-end with quality strings */
- skip_header(*input1);
+ skip_header(&(*nchars1),*input1,*nextchar);
*nextchar = fgetc(*input1);
- quality_length = input_oneline(&(*nextchar),&long_quality,&(Quality[0]),*input1,
+ *nchars1 += 1;
+ quality_length = input_oneline(&(*nextchar),&(*nchars1),&long_quality,&(Quality[0]),*input1,
/*possible_fasta_header_p*/false);
if (quality_length != fulllength1) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
@@ -2215,7 +2869,7 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
Quality,long_quality,quality_length,barcode_length,
invert_first_p,/*copy_acc_p*/false,skipp);
- quality_length = input_oneline(&(*nextchar),&long_quality,&(Quality[0]),*input1,
+ quality_length = input_oneline(&(*nextchar),&(*nchars1),&long_quality,&(Quality[0]),*input1,
/*possible_fasta_header_p*/false);
if (quality_length != fulllength2) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
@@ -2238,6 +2892,7 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
} else {
if (*input2 == NULL && *nfiles > 0 && force_single_end_p == false &&
(*input2 = FOPEN_READ_TEXT((*files)[0])) != NULL) {
+ debugf(fprintf(stderr,"Master opening input file 1\n"));
(*files) += 1;
(*nfiles) -= 1;
nextchar2 = '\0';
@@ -2245,7 +2900,7 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
if (*input2 != NULL) {
/* Paired-end in two files */
- if ((acc2 = input_header(&filterp,&restofheader2,*input2,skipp)) == NULL) {
+ if ((acc2 = input_header(&(*nchars2),&filterp,&restofheader2,nextchar2,*input2,skipp)) == NULL) {
/* fprintf(stderr,"No header\n"); */
/* File ends after >. Don't process, but loop again */
(*queryseq2) = (T) NULL;
@@ -2253,20 +2908,27 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
} else if ((nextchar2 = fgetc(*input2)) == '\r' || nextchar2 == '\n') {
/* Process blank lines and loop again */
while (nextchar2 != EOF && ((nextchar2 = fgetc(*input2)) != '>')) {
+ *nchars2 += 1;
+ }
+ if (nextchar2 != EOF) {
+ *nchars2 += 1;
}
(*queryseq2) = (T) NULL;
- } else if ((fulllength2 = input_oneline(&nextchar2,&long_read_2,&(Read2[0]),*input2,
+ } else if ((fulllength2 = input_oneline(&nextchar2,&(*nchars2),&long_read_2,&(Read2[0]),*input2,
/*possible_fasta_header_p*/true)) == 0) {
+ *nchars2 += 1; /* For first "else if" clause */
/* fprintf(stderr,"length is zero\n"); */
/* No sequence1. Don't process, but loop again */
- /* nextchar2= EOF; */
+ /* nextchar2 = EOF; */
(*queryseq2) = (T) NULL;
} else {
+ *nchars2 += 1; /* For first "else if" clause */
if (*nextchar == '+') {
/* End 1 with a quality string */
- skip_header(*input1);
+ skip_header(&(*nchars1),*input1,*nextchar);
*nextchar = fgetc(*input1);
- quality_length = input_oneline(&(*nextchar),&long_quality,&(Quality[0]),*input1,
+ *nchars1 += 1;
+ quality_length = input_oneline(&(*nextchar),&(*nchars1),&long_quality,&(Quality[0]),*input1,
/*possible_fasta_header_p*/false);
if (quality_length != fulllength1) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
@@ -2286,9 +2948,10 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
if (nextchar2 == '+') {
/* End 2 with a quality string */
- skip_header(*input2);
+ skip_header(&(*nchars2),*input2,nextchar2);
nextchar2 = fgetc(*input2);
- quality_length = input_oneline(&nextchar2,&long_quality,&(Quality[0]),*input2,
+ *nchars2 += 1;
+ quality_length = input_oneline(&nextchar2,&(*nchars2),&long_quality,&(Quality[0]),*input2,
/*possible_fasta_header_p*/false);
if (quality_length != fulllength2) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
@@ -2316,9 +2979,10 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
/* Single-end: Either EOF, '>', or '+' */
if (*nextchar == '+') {
/* Single-end with a quality string */
- skip_header(*input1);
+ skip_header(&(*nchars1),*input1,*nextchar);
*nextchar = fgetc(*input1);
- quality_length = input_oneline(&(*nextchar),&long_quality,&(Quality[0]),*input1,
+ *nchars1 += 1;
+ quality_length = input_oneline(&(*nextchar),&(*nchars1),&long_quality,&(Quality[0]),*input1,
/*possible_fasta_header_p*/false);
if (quality_length != fulllength1) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
@@ -2339,11 +3003,10 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
}
}
- debug(printf("Returning queryseq with contents %s\n",queryseq1->contents));
-
if (queryseq1 == (T) SKIPPED) {
return (T) SKIPPED;
} else if (queryseq1 != NULL && queryseq1->acc != NULL && queryseq1->fulllength > 0) {
+ debug(printf("nchars %d: Returning queryseq with contents %s\n",*nchars1,queryseq1->contents));
return queryseq1;
}
}
@@ -2351,107 +3014,186 @@ Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
}
-
-
-#ifdef HAVE_ZLIB
-T
-Shortread_read_fasta_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input1, gzFile*input2,
- char ***files, int *nfiles, bool skipp,
- int barcode_length, bool invert_first_p, bool invert_second_p) {
+#ifdef USE_MPI
+static T
+read_fasta_filecontents (int *nextchar, T *queryseq2,
+ char **filecontents1, char **filecontents2,
+#ifdef USE_MPI_FILE_INPUT
+ MPI_File *input1, MPI_File *input2, MPI_Comm workers_comm,
+#else
+ FILE **input1, FILE **input2,
+#endif
+ char ***files, int *nfiles, bool skipp) {
T queryseq1;
+ int nextchar2;
char *acc, *restofheader, *acc2, *restofheader2;
- char *long_read_1, *long_read_2;
- int nextchar2 = '\0';
- int fulllength1, fulllength2;
+ char *long_read_1, *long_read_2, *long_quality;
+ int fulllength1, fulllength2, quality_length;
bool filterp;
while (1) {
queryseq1 = *queryseq2 = (T) NULL;
- if (*input1 == NULL || gzeof(*input1)) {
+
+ if (*nextchar == EOF || *nextchar == '\0') {
if (*input1 != NULL) {
- gzclose(*input1);
+#ifdef USE_MPI_FILE_INPUT
+ debugf(fprintf(stderr,"Slave closing input 1 using MPI_File_close\n"));
+ MPI_File_close(&(*input1));
+#else
+ debugf(fprintf(stderr,"Slave closing input 1 using fclose\n"));
+ fclose(*input1);
+#endif
*input1 = NULL;
}
if (*input2 != NULL) {
- gzclose(*input2);
+#ifdef USE_MPI_FILE_INPUT
+ debugf(fprintf(stderr,"Slave closing input 2 using MPI_File_close\n"));
+ MPI_File_close(&(*input2));
+#else
+ debugf(fprintf(stderr,"Slave closing input 2 using fclose\n"));
+ fclose(*input2);
+#endif
*input2 = NULL;
}
if (*nfiles == 0) {
+#ifdef USE_MPI_FILE_INPUT
+ *nextchar = '\0';
+#else
*nextchar = EOF;
+#endif
return (T) NULL;
} else if (*nfiles == 1 || force_single_end_p == true) {
- if ((*input1 = gzopen((*files)[0],"rb")) == NULL) {
+#ifdef USE_MPI_FILE_INPUT
+ if ((*input1 = MPI_fopen((*files)[0],workers_comm)) == NULL) {
+ fprintf(stderr,"Can't open file %s => skipping it.\n",(*files)[0]);
+ (*files) += 1;
+ (*nfiles) -= 1;
+ *nextchar = '\0';
+ return (T) NULL;
+ } else {
+ debugf(fprintf(stderr,"Slave opening input file 1\n"));
+ *input2 = NULL;
+ (*files) += 1;
+ (*nfiles) -= 1;
+ nextchar2 = '\0';
+ }
+#else
+ if ((*input1 = FOPEN_READ_TEXT((*files)[0])) == NULL) {
fprintf(stderr,"Can't open file %s => skipping it.\n",(*files)[0]);
(*files) += 1;
(*nfiles) -= 1;
*nextchar = EOF;
return (T) NULL;
} else {
+ debugf(fprintf(stderr,"Slave opening input file 2\n"));
*input2 = NULL;
(*files) += 1;
(*nfiles) -= 1;
- *nextchar = '\0';
+ nextchar2 = '\0';
}
+#endif
} else {
- while (*nfiles > 0 && (*input1 = gzopen((*files)[0],"rb")) == NULL) {
+#ifdef USE_MPI_FILE_INPUT
+ while (*nfiles > 0 && (*input1 = MPI_fopen((*files)[0],workers_comm)) == NULL) {
fprintf(stderr,"Can't open file %s => skipping it.\n",(*files)[0]);
- (*files)++;
- (*nfiles)--;
+ (*files) += 1;
+ (*nfiles) -= 1;
}
if (*input1 == NULL) {
- *nextchar = EOF;
+ *nextchar = '\0';
return (T) NULL;
} else {
-#ifdef HAVE_ZLIB_GZBUFFER
- gzbuffer(*input1,GZBUFFER_SIZE);
-#endif
- (*files)++;
- (*nfiles)--;
+ debugf(fprintf(stderr,"Slave opening input file 1\n"));
+ (*files) += 1;
+ (*nfiles) -= 1;
*nextchar = '\0';
}
- }
- }
+#else
+ while (*nfiles > 0 && (*input1 = FOPEN_READ_TEXT((*files)[0])) == NULL) {
+ fprintf(stderr,"Can't open file %s => skipping it.\n",(*files)[0]);
+ (*files) += 1;
+ (*nfiles) -= 1;
+ }
+ if (*input1 == NULL) {
+ *nextchar = EOF;
+ return (T) NULL;
+ } else {
+ debugf(fprintf(stderr,"Slave opening input file 1\n"));
+ (*files) += 1;
+ (*nfiles) -= 1;
+ *nextchar = '\0';
+ }
+#endif
+ }
+ }
if (*nextchar == '\0') {
- if ((*nextchar = Shortread_input_init_gzip(*input1)) == EOF) {
- *nextchar = EOF;
+ if ((*nextchar = Shortread_input_init_filecontents(&(*filecontents1))) == '\0') {
return (T) NULL;
}
}
debug(printf("** Getting header\n"));
- if ((acc = input_header_gzip(&filterp,&restofheader,*input1,skipp)) == NULL) {
+ if ((acc = input_header_filecontents(&filterp,&restofheader,*nextchar,&(*filecontents1),skipp)) == NULL) {
/* fprintf(stderr,"No header\n"); */
/* File ends after >. Don't process, but loop again */
- *nextchar = EOF;
- } else if ((*nextchar = gzgetc(*input1)) == '\r' || *nextchar == '\n') {
+ *nextchar = '\0';
+ } else if ((*nextchar = *(*filecontents1)++) == '\r' || *nextchar == '\n') {
/* Process blank lines and loop again */
- while (*nextchar != EOF && ((*nextchar = gzgetc(*input1)) != '>')) {
- }
- } else if ((fulllength1 = input_oneline_gzip(&(*nextchar),&long_read_1,&(Read1[0]),*input1,
- /*possible_fasta_header_p*/true)) == 0) {
+ while (*nextchar != '\0' && ((*nextchar = *(*filecontents1)++) != '>')) ;
+ } else if ((fulllength1 = input_oneline_filecontents(&(*nextchar),&long_read_1,&(Read1[0]),&(*filecontents1),
+ /*possible_fasta_header_p*/true)) == 0) {
/* fprintf(stderr,"length is zero\n"); */
/* No sequence1. Don't process, but loop again */
- /* *nextchar = EOF; */
+ /* *nextchar = '\0'; */
} else {
/* queryseq1 is in Read1 */
/* See what is in next line */
- if ((fulllength2 = input_oneline_gzip(&(*nextchar),&long_read_2,&(Read2[0]),*input1,
- /*possible_fasta_header_p*/true)) > 0) {
+ if ((fulllength2 = input_oneline_filecontents(&(*nextchar),&long_read_2,&(Read2[0]),&(*filecontents1),
+ /*possible_fasta_header_p*/true)) > 0) {
/* Paired-end, single file. queryseq1 is in Read1 and queryseq2 is in Read2 */
- queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
- /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
- invert_first_p,/*copy_acc_p*/false,skipp);
- (*queryseq2) = Shortread_new(/*acc*/NULL,/*restofheader*/NULL,filterp,Read2,long_read_2,fulllength2,
- /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
- invert_second_p,/*copy_acc_p*/false,skipp);
+ if (*nextchar == '+') {
+ /* Paired-end with quality strings */
+ skip_header_filecontents(&(*filecontents1),*nextchar);
+ *nextchar = *(*filecontents1)++;
+ quality_length = input_oneline_filecontents(&(*nextchar),&long_quality,&(Quality[0]),&(*filecontents1),
+ /*possible_fasta_header_p*/false);
+ if (quality_length != fulllength1) {
+ fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
+ quality_length,fulllength1,acc);
+ abort();
+ }
+
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
+ Quality,long_quality,quality_length,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ quality_length = input_oneline_filecontents(&(*nextchar),&long_quality,&(Quality[0]),&(*filecontents1),
+ /*possible_fasta_header_p*/false);
+ if (quality_length != fulllength2) {
+ fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
+ quality_length,fulllength2,acc);
+ abort();
+ }
+
+ (*queryseq2) = Shortread_new(/*acc*/NULL,/*restofheader*/NULL,filterp,Read2,long_read_2,fulllength2,
+ Quality,long_quality,quality_length,barcode_length,
+ invert_second_p,/*copy_acc_p*/false,skipp);
+ } else {
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
+ /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ (*queryseq2) = Shortread_new(/*acc*/NULL,/*restofheader*/NULL,filterp,Read2,long_read_2,fulllength2,
+ /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
+ invert_second_p,/*copy_acc_p*/false,skipp);
+ }
} else {
- if (*input2 == NULL && *nfiles > 0 && force_single_end_p == false &&
+ if (*filecontents2 == NULL && *nfiles > 0 && force_single_end_p == false &&
(*input2 = gzopen((*files)[0],"rb")) != NULL) {
+ debugf(fprintf(stderr,"Slave opening input file 2\n"));
#ifdef HAVE_ZLIB_GZBUFFER
gzbuffer(*input2,GZBUFFER_SIZE);
#endif
@@ -2460,79 +3202,137 @@ Shortread_read_fasta_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
nextchar2 = '\0';
}
- if (*input2 != NULL) {
+ if (*filecontents2 != NULL) {
/* Paired-end in two files */
- if ((acc2 = input_header_gzip(&filterp,&restofheader2,*input2,skipp)) == NULL) {
+ if ((acc2 = input_header_filecontents(&filterp,&restofheader2,nextchar2,&(*filecontents2),skipp)) == NULL) {
/* fprintf(stderr,"No header\n"); */
/* File ends after >. Don't process, but loop again */
(*queryseq2) = (T) NULL;
- nextchar2 = EOF;
- } else if ((nextchar2 = gzgetc(*input2)) == '\r' || nextchar2 == '\n') {
+ nextchar2 = '\0';
+ } else if ((nextchar2 = *(*filecontents2)++) == '\r' || nextchar2 == '\n') {
/* Process blank lines and loop again */
- while (nextchar2 != EOF && ((nextchar2 = gzgetc(*input2)) != '>')) {
- }
+ while (nextchar2 != '\0' && ((nextchar2 = *(*filecontents2)++) != '>')) ;
(*queryseq2) = (T) NULL;
- } else if ((fulllength2 = input_oneline_gzip(&nextchar2,&long_read_2,&(Read2[0]),*input2,
- /*possible_fasta_header_p*/true)) == 0) {
+ } else if ((fulllength2 = input_oneline_filecontents(&nextchar2,&long_read_2,&(Read2[0]),&(*filecontents2),
+ /*possible_fasta_header_p*/true)) == 0) {
/* fprintf(stderr,"length is zero\n"); */
/* No sequence1. Don't process, but loop again */
- /* *nextchar = EOF; */
+ /* nextchar2 = '\0'; */
(*queryseq2) = (T) NULL;
} else {
- queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
- /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
- invert_first_p,/*copy_acc_p*/false,skipp);
- (*queryseq2) = Shortread_new(/*acc2*/NULL,/*restofheader2*/NULL,filterp,Read2,long_read_2,fulllength2,
- /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
- invert_second_p,/*copy_acc_p*/false,skipp);
- FREE_IN(acc2);
- FREE_IN(restofheader2);
+ if (*nextchar == '+') {
+ /* End 1 with a quality string */
+ skip_header_filecontents(&(*filecontents1),*nextchar);
+ *nextchar = *(*filecontents1)++;
+ quality_length = input_oneline_filecontents(&(*nextchar),&long_quality,&(Quality[0]),&(*filecontents1),
+ /*possible_fasta_header_p*/false);
+ if (quality_length != fulllength1) {
+ fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
+ quality_length,fulllength1,acc);
+ abort();
+ } else {
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
+ Quality,long_quality,quality_length,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ }
+ } else {
+ /* End 1 without quality string */
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
+ /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ }
+
+ if (nextchar2 == '+') {
+ /* End 2 with a quality string */
+ skip_header_filecontents(&(*filecontents2),nextchar2);
+ nextchar2 = *(*filecontents2)++;
+ quality_length = input_oneline_filecontents(&nextchar2,&long_quality,&(Quality[0]),&(*filecontents2),
+ /*possible_fasta_header_p*/false);
+ if (quality_length != fulllength2) {
+ fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
+ quality_length,fulllength2,acc2);
+ abort();
+ } else {
+ /* For FASTA, drop second accession */
+ (*queryseq2) = Shortread_new(/*acc2*/NULL,/*restofheader2*/NULL,filterp,Read2,long_read_2,fulllength2,
+ Quality,long_quality,quality_length,barcode_length,
+ invert_second_p,/*copy_acc_p*/false,skipp);
+ FREE_IN(acc2);
+ FREE_IN(restofheader2);
+ }
+ } else {
+ /* End 2 without quality string */
+ (*queryseq2) = Shortread_new(/*acc2*/NULL,/*restofheader2*/NULL,filterp,Read2,long_read_2,fulllength2,
+ /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
+ invert_second_p,/*copy_acc_p*/false,skipp);
+ FREE_IN(acc2);
+ FREE_IN(restofheader2);
+ }
}
} else {
/* Single-end: Either EOF, '>', or '+' */
- queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
- /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
- invert_first_p,/*copy_acc_p*/false,skipp);
+ if (*nextchar == '+') {
+ /* Single-end with a quality string */
+ skip_header_filecontents(&(*filecontents1),*nextchar);
+ *nextchar = *(*filecontents1)++;
+ quality_length = input_oneline_filecontents(&(*nextchar),&long_quality,&(Quality[0]),&(*filecontents1),
+ /*possible_fasta_header_p*/false);
+ if (quality_length != fulllength1) {
+ fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
+ quality_length,fulllength1,acc);
+ abort();
+ } else {
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
+ Quality,long_quality,quality_length,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ }
+ } else {
+ /* Single-end without quality string */
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
+ /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ }
(*queryseq2) = (T) NULL;
}
}
- debug(printf("Returning queryseq with contents %s\n",queryseq1->contents));
-
if (queryseq1 == (T) SKIPPED) {
return (T) SKIPPED;
} else if (queryseq1 != NULL && queryseq1->acc != NULL && queryseq1->fulllength > 0) {
+ debug(printf("Returning queryseq with contents %s\n",queryseq1->contents));
return queryseq1;
}
}
-
}
}
#endif
-#ifdef HAVE_BZLIB
+#ifdef HAVE_ZLIB
T
-Shortread_read_fasta_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *input1, Bzip2_T *input2,
- char ***files, int *nfiles, bool skipp,
- int barcode_length, bool invert_first_p, bool invert_second_p) {
+Shortread_read_fasta_gzip (int *nextchar, T *queryseq2,
+#ifdef USE_MPI
+ Filestring_T filestring1, Filestring_T filestring2,
+#endif
+ gzFile *input1, gzFile *input2,
+ char ***files, int *nfiles, bool skipp) {
T queryseq1;
+ int nextchar2;
char *acc, *restofheader, *acc2, *restofheader2;
- char *long_read_1, *long_read_2;
- int nextchar2 = '\0';
- int fulllength1, fulllength2;
+ char *long_read_1, *long_read_2, *long_quality;
+ int fulllength1, fulllength2, quality_length;
bool filterp;
while (1) {
queryseq1 = *queryseq2 = (T) NULL;
- if (*input1 == NULL || bzeof(*input1)) {
+ if (*input1 == NULL || *nextchar == EOF) { /* was gzeof(*input1) */
if (*input1 != NULL) {
- Bzip2_free(&(*input1));
+ gzclose(*input1);
*input1 = NULL;
}
if (*input2 != NULL) {
- Bzip2_free(&(*input2));
+ gzclose(*input2);
*input2 = NULL;
}
@@ -2541,21 +3341,24 @@ Shortread_read_fasta_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
return (T) NULL;
} else if (*nfiles == 1 || force_single_end_p == true) {
- if ((*input1 = Bzip2_new((*files)[0])) == NULL) {
+ if ((*input1 = gzopen((*files)[0],"rb")) == NULL) {
fprintf(stderr,"Can't open file %s => skipping it.\n",(*files)[0]);
(*files) += 1;
(*nfiles) -= 1;
*nextchar = EOF;
return (T) NULL;
} else {
+#ifdef HAVE_ZLIB_GZBUFFER
+ gzbuffer(*input1,GZBUFFER_SIZE);
+#endif
*input2 = NULL;
(*files) += 1;
(*nfiles) -= 1;
- *nextchar = '\0';
+ nextchar2 = '\0';
}
} else {
- while (*nfiles > 0 && (*input1 = Bzip2_new((*files)[0])) == NULL) {
+ while (*nfiles > 0 && (*input1 = gzopen((*files)[0],"rb")) == NULL) {
fprintf(stderr,"Can't open file %s => skipping it.\n",(*files)[0]);
(*files)++;
(*nfiles)--;
@@ -2564,6 +3367,9 @@ Shortread_read_fasta_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
*nextchar = EOF;
return (T) NULL;
} else {
+#ifdef HAVE_ZLIB_GZBUFFER
+ gzbuffer(*input1,GZBUFFER_SIZE);
+#endif
(*files)++;
(*nfiles)--;
*nextchar = '\0';
@@ -2572,41 +3378,105 @@ Shortread_read_fasta_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
}
if (*nextchar == '\0') {
- if ((*nextchar = Shortread_input_init_bzip2(*input1)) == EOF) {
- *nextchar = EOF;
+ if ((*nextchar = Shortread_input_init_gzip(*input1)) == EOF) {
return (T) NULL;
}
}
debug(printf("** Getting header\n"));
- if ((acc = input_header_bzip2(&filterp,&restofheader,*input1,skipp)) == NULL) {
+ if ((acc = input_header_gzip(&filterp,&restofheader,*nextchar,
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,skipp)) == NULL) {
/* fprintf(stderr,"No header\n"); */
/* File ends after >. Don't process, but loop again */
*nextchar = EOF;
- } else if ((*nextchar = bzgetc(*input1)) == '\r' || *nextchar == '\n') {
+ } else if ((*nextchar = gzgetc(*input1)) == '\r' || *nextchar == '\n') {
/* Process blank lines and loop again */
- while (*nextchar != EOF && ((*nextchar = bzgetc(*input1)) != '>')) {
+ while (*nextchar != EOF && ((*nextchar = gzgetc(*input1)) != '>')) {
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring1);
+#endif
+ }
+#ifdef USE_MPI
+ if (*nextchar != EOF) {
+ Filestring_putc(*nextchar,filestring1);
}
- } else if ((fulllength1 = input_oneline_bzip2(&(*nextchar),&long_read_1,&(Read1[0]),*input1,
- /*possible_fasta_header_p*/true)) == 0) {
+#endif
+ } else if ((fulllength1 = input_oneline_gzip(&(*nextchar),&long_read_1,&(Read1[0]),
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,/*possible_fasta_header_p*/true)) == 0) {
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring1); /* For first "else if" clause */
+#endif
/* fprintf(stderr,"length is zero\n"); */
/* No sequence1. Don't process, but loop again */
/* *nextchar = EOF; */
} else {
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring1); /* For first "else if" clause */
+#endif
/* queryseq1 is in Read1 */
/* See what is in next line */
- if ((fulllength2 = input_oneline_bzip2(&(*nextchar),&long_read_2,&(Read2[0]),*input1,
- /*possible_fasta_header_p*/true)) > 0) {
+ if ((fulllength2 = input_oneline_gzip(&(*nextchar),&long_read_2,&(Read2[0]),
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,/*possible_fasta_header_p*/true)) > 0) {
/* Paired-end, single file. queryseq1 is in Read1 and queryseq2 is in Read2 */
- queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
- /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
- invert_first_p,/*copy_acc_p*/false,skipp);
- (*queryseq2) = Shortread_new(/*acc*/NULL,/*restofheader*/NULL,filterp,Read2,long_read_2,fulllength2,
- /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
- invert_second_p,/*copy_acc_p*/false,skipp);
+ if (*nextchar == '+') {
+ /* Paired-end with quality strings */
+ skip_header_gzip(*input1,*nextchar);
+ *nextchar = gzgetc(*input1);
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring1);
+#endif
+ quality_length = input_oneline_gzip(&(*nextchar),&long_quality,&(Quality[0]),
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,/*possible_fasta_header_p*/false);
+ if (quality_length != fulllength1) {
+ fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
+ quality_length,fulllength1,acc);
+ abort();
+ }
+
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
+ Quality,long_quality,quality_length,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ quality_length = input_oneline_gzip(&(*nextchar),&long_quality,&(Quality[0]),
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,/*possible_fasta_header_p*/false);
+ if (quality_length != fulllength2) {
+ fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
+ quality_length,fulllength2,acc);
+ abort();
+ }
+
+ (*queryseq2) = Shortread_new(/*acc*/NULL,/*restofheader*/NULL,filterp,Read2,long_read_2,fulllength2,
+ Quality,long_quality,quality_length,barcode_length,
+ invert_second_p,/*copy_acc_p*/false,skipp);
+ } else {
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
+ /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ (*queryseq2) = Shortread_new(/*acc*/NULL,/*restofheader*/NULL,filterp,Read2,long_read_2,fulllength2,
+ /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
+ invert_second_p,/*copy_acc_p*/false,skipp);
+ }
+
} else {
if (*input2 == NULL && *nfiles > 0 && force_single_end_p == false &&
- (*input2 = Bzip2_new((*files)[0])) != NULL) {
+ (*input2 = gzopen((*files)[0],"rb")) != NULL) {
+#ifdef HAVE_ZLIB_GZBUFFER
+ gzbuffer(*input2,GZBUFFER_SIZE);
+#endif
(*files) += 1;
(*nfiles) -= 1;
nextchar2 = '\0';
@@ -2614,84 +3484,703 @@ Shortread_read_fasta_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
if (*input2 != NULL) {
/* Paired-end in two files */
- if ((acc2 = input_header_bzip2(&filterp,&restofheader2,*input2,skipp)) == NULL) {
+ if ((acc2 = input_header_gzip(&filterp,&restofheader2,nextchar2,
+#ifdef USE_MPI
+ filestring2,
+#endif
+ *input2,skipp)) == NULL) {
/* fprintf(stderr,"No header\n"); */
/* File ends after >. Don't process, but loop again */
(*queryseq2) = (T) NULL;
nextchar2 = EOF;
- } else if ((nextchar2 = bzgetc(*input2)) == '\r' || nextchar2 == '\n') {
+ } else if ((nextchar2 = gzgetc(*input2)) == '\r' || nextchar2 == '\n') {
/* Process blank lines and loop again */
- while (nextchar2 != EOF && ((nextchar2 = bzgetc(*input2)) != '>')) {
+ while (nextchar2 != EOF && ((nextchar2 = gzgetc(*input2)) != '>')) {
+#ifdef USE_MPI
+ Filestring_putc(nextchar2,filestring2);
+#endif
}
+#ifdef USE_MPI
+ if (nextchar2 != EOF) {
+ Filestring_putc(nextchar2,filestring2);
+ }
+#endif
(*queryseq2) = (T) NULL;
- } else if ((fulllength2 = input_oneline_bzip2(&nextchar2,&long_read_2,&(Read2[0]),*input2,
- /*possible_fasta_header_p*/true)) == 0) {
+ } else if ((fulllength2 = input_oneline_gzip(&nextchar2,&long_read_2,&(Read2[0]),
+#ifdef USE_MPI
+ filestring2,
+#endif
+ *input2,/*possible_fasta_header_p*/true)) == 0) {
+#ifdef USE_MPI
+ Filestring_putc(nextchar2,filestring2); /* For first "else if" clause */
+#endif
/* fprintf(stderr,"length is zero\n"); */
/* No sequence1. Don't process, but loop again */
- /* *nextchar = EOF; */
+ /* nextchar2 = EOF; */
(*queryseq2) = (T) NULL;
} else {
- queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
- /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
- invert_first_p,/*copy_acc_p*/false,skipp);
- (*queryseq2) = Shortread_new(/*acc2*/NULL,/*restofheader2*/NULL,filterp,Read2,long_read_2,fulllength2,
- /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
- invert_second_p,/*copy_acc_p*/false,skipp);
- FREE_IN(acc2);
- FREE_IN(restofheader2);
- }
-
- } else {
- /* Single-end: Either EOF, '>', or '+' */
- queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
- /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
- invert_first_p,/*copy_acc_p*/false,skipp);
- (*queryseq2) = (T) NULL;
- }
- }
-
- debug(printf("Returning queryseq with contents %s\n",queryseq1->contents));
-
- if (queryseq1 == (T) SKIPPED) {
- return (T) SKIPPED;
- } else if (queryseq1 != NULL && queryseq1->acc != NULL && queryseq1->fulllength > 0) {
- return queryseq1;
- }
- }
-
- }
-}
+#ifdef USE_MPI
+ Filestring_putc(nextchar2,filestring2); /* For first "else if" clause */
+#endif
+ if (*nextchar == '+') {
+ /* End 1 with a quality string */
+ skip_header_gzip(*input1,*nextchar);
+ *nextchar = gzgetc(*input1);
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring1);
+#endif
+ quality_length = input_oneline_gzip(&(*nextchar),&long_quality,&(Quality[0]),
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,/*possible_fasta_header_p*/false);
+ if (quality_length != fulllength1) {
+ fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
+ quality_length,fulllength1,acc);
+ abort();
+ } else {
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
+ Quality,long_quality,quality_length,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ }
+ } else {
+ /* End 1 without quality string */
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
+ /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ }
+
+ if (nextchar2 == '+') {
+ /* End 2 with a quality string */
+ skip_header_gzip(*input2,nextchar2);
+ nextchar2 = gzgetc(*input2);
+#ifdef USE_MPI
+ Filestring_putc(nextchar2,filestring2);
+#endif
+ quality_length = input_oneline_gzip(&nextchar2,&long_quality,&(Quality[0]),
+#ifdef USE_MPI
+ filestring2,
+#endif
+ *input2,/*possible_fasta_header_p*/false);
+ if (quality_length != fulllength2) {
+ fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
+ quality_length,fulllength2,acc2);
+ abort();
+ } else {
+ /* For FASTA, drop second accession */
+ (*queryseq2) = Shortread_new(/*acc2*/NULL,/*restofheader2*/NULL,filterp,Read2,long_read_2,fulllength2,
+ Quality,long_quality,quality_length,barcode_length,
+ invert_second_p,/*copy_acc_p*/false,skipp);
+ FREE_IN(acc2);
+ FREE_IN(restofheader2);
+ }
+ } else {
+ /* End 2 without quality string */
+ (*queryseq2) = Shortread_new(/*acc2*/NULL,/*restofheader2*/NULL,filterp,Read2,long_read_2,fulllength2,
+ /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
+ invert_second_p,/*copy_acc_p*/false,skipp);
+ FREE_IN(acc2);
+ FREE_IN(restofheader2);
+ }
+ }
+
+ } else {
+ /* Single-end: Either EOF, '>', or '+' */
+ if (*nextchar == '+') {
+ /* Single-end with a quality string */
+ skip_header_gzip(*input1,*nextchar);
+ *nextchar = gzgetc(*input1);
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring1);
+#endif
+ quality_length = input_oneline_gzip(&(*nextchar),&long_quality,&(Quality[0]),
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,/*possible_fasta_header_p*/false);
+ if (quality_length != fulllength1) {
+ fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
+ quality_length,fulllength1,acc);
+ abort();
+ } else {
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
+ Quality,long_quality,quality_length,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ }
+ } else {
+ /* Single-end without quality string */
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
+ /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ }
+ (*queryseq2) = (T) NULL;
+ }
+ }
+
+ if (queryseq1 == (T) SKIPPED) {
+ return (T) SKIPPED;
+ } else if (queryseq1 != NULL && queryseq1->acc != NULL && queryseq1->fulllength > 0) {
+ debug(printf("Returning queryseq with contents %s\n",queryseq1->contents));
+ return queryseq1;
+ }
+ }
+
+ }
+}
#endif
+#ifdef HAVE_BZLIB
T
-Shortread_read_fastq_shortreads (int *nextchar, T *queryseq2, FILE **input1, FILE **input2,
- char ***files, int *nfiles, bool skipp,
- int barcode_length, bool invert_first_p, bool invert_second_p) {
+Shortread_read_fasta_bzip2 (int *nextchar, T *queryseq2,
+#ifdef USE_MPI
+ Filestring_T filestring1, Filestring_T filestring2,
+#endif
+ Bzip2_T *input1, Bzip2_T *input2,
+ char ***files, int *nfiles, bool skipp) {
+ T queryseq1;
+ int nextchar2;
+ char *acc, *restofheader, *acc2, *restofheader2;
+ char *long_read_1, *long_read_2, *long_quality;
+ int fulllength1, fulllength2, quality_length;
+ bool filterp;
+
+ while (1) {
+ queryseq1 = *queryseq2 = (T) NULL;
+ if (*input1 == NULL || *nextchar == EOF) { /* Was bzeof(*input1) */
+ if (*input1 != NULL) {
+ Bzip2_free(&(*input1));
+ *input1 = NULL;
+ }
+ if (*input2 != NULL) {
+ Bzip2_free(&(*input2));
+ *input2 = NULL;
+ }
+
+ if (*nfiles == 0) {
+ *nextchar = EOF;
+ return (T) NULL;
+
+ } else if (*nfiles == 1 || force_single_end_p == true) {
+ if ((*input1 = Bzip2_new((*files)[0])) == NULL) {
+ fprintf(stderr,"Can't open file %s => skipping it.\n",(*files)[0]);
+ (*files) += 1;
+ (*nfiles) -= 1;
+ *nextchar = EOF;
+ return (T) NULL;
+ } else {
+ *input2 = NULL;
+ (*files) += 1;
+ (*nfiles) -= 1;
+ nextchar2 = '\0';
+ }
+
+ } else {
+ while (*nfiles > 0 && (*input1 = Bzip2_new((*files)[0])) == NULL) {
+ fprintf(stderr,"Can't open file %s => skipping it.\n",(*files)[0]);
+ (*files)++;
+ (*nfiles)--;
+ }
+ if (*input1 == NULL) {
+ *nextchar = EOF;
+ return (T) NULL;
+ } else {
+ (*files)++;
+ (*nfiles)--;
+ *nextchar = '\0';
+ }
+ }
+ }
+
+ if (*nextchar == '\0') {
+ if ((*nextchar = Shortread_input_init_bzip2(*input1)) == EOF) {
+ return (T) NULL;
+ }
+ }
+
+ debug(printf("** Getting header\n"));
+ if ((acc = input_header_bzip2(&filterp,&restofheader,*nextchar,
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,skipp)) == NULL) {
+ /* fprintf(stderr,"No header\n"); */
+ /* File ends after >. Don't process, but loop again */
+ *nextchar = EOF;
+ } else if ((*nextchar = bzgetc(*input1)) == '\r' || *nextchar == '\n') {
+ /* Process blank lines and loop again */
+ while (*nextchar != EOF && ((*nextchar = bzgetc(*input1)) != '>')) {
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring1);
+#endif
+ }
+#ifdef USE_MPI
+ if (*nextchar != EOF) {
+ Filestring_putc(*nextchar,filestring1);
+ }
+#endif
+ } else if ((fulllength1 = input_oneline_bzip2(&(*nextchar),&long_read_1,&(Read1[0]),
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,/*possible_fasta_header_p*/true)) == 0) {
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring1); /* For first "else if" clause */
+#endif
+ /* fprintf(stderr,"length is zero\n"); */
+ /* No sequence1. Don't process, but loop again */
+ /* *nextchar = EOF */
+ } else {
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring1); /* For first "else if" clause */
+#endif
+ /* queryseq1 is in Read1 */
+ /* See what is in next line */
+ if ((fulllength2 = input_oneline_bzip2(&(*nextchar),&long_read_2,&(Read2[0]),
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,/*possible_fasta_header_p*/true)) > 0) {
+ /* Paired-end, single file. queryseq1 is in Read1 and queryseq2 is in Read2 */
+ if (*nextchar == '+') {
+ /* Paired-end with quality strings */
+ skip_header_bzip2(*input1,*nextchar);
+ *nextchar = bzgetc(*input1);
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring1);
+#endif
+ quality_length = input_oneline_bzip2(&(*nextchar),&long_quality,&(Quality[0]),
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,/*possible_fasta_header_p*/false);
+ if (quality_length != fulllength1) {
+ fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
+ quality_length,fulllength1,acc);
+ abort();
+ }
+
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
+ Quality,long_quality,quality_length,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ quality_length = input_oneline_bzip2(&(*nextchar),&long_quality,&(Quality[0]),
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,/*possible_fasta_header_p*/false);
+ if (quality_length != fulllength2) {
+ fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
+ quality_length,fulllength2,acc);
+ abort();
+ }
+
+ (*queryseq2) = Shortread_new(/*acc*/NULL,/*restofheader*/NULL,filterp,Read2,long_read_2,fulllength2,
+ Quality,long_quality,quality_length,barcode_length,
+ invert_second_p,/*copy_acc_p*/false,skipp);
+ } else {
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
+ /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ (*queryseq2) = Shortread_new(/*acc*/NULL,/*restofheader*/NULL,filterp,Read2,long_read_2,fulllength2,
+ /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
+ invert_second_p,/*copy_acc_p*/false,skipp);
+ }
+
+ } else {
+ if (*input2 == NULL && *nfiles > 0 && force_single_end_p == false &&
+ (*input2 = Bzip2_new((*files)[0])) != NULL) {
+ (*files) += 1;
+ (*nfiles) -= 1;
+ nextchar2 = '\0';
+ }
+
+ if (*input2 != NULL) {
+ /* Paired-end in two files */
+ if ((acc2 = input_header_bzip2(&filterp,&restofheader2,nextchar2,
+#ifdef USE_MPI
+ filestring2,
+#endif
+ *input2,skipp)) == NULL) {
+ /* fprintf(stderr,"No header\n"); */
+ /* File ends after >. Don't process, but loop again */
+ (*queryseq2) = (T) NULL;
+ nextchar2 = EOF;
+ } else if ((nextchar2 = bzgetc(*input2)) == '\r' || nextchar2 == '\n') {
+ /* Process blank lines and loop again */
+ while (nextchar2 != EOF && ((nextchar2 = bzgetc(*input2)) != '>')) {
+#ifdef USE_MPI
+ Filestring_putc(nextchar2,filestring2);
+#endif
+ }
+#ifdef USE_MPI
+ if (nextchar2 != EOF) {
+ Filestring_putc(nextchar2,filestring2);
+ }
+#endif
+ (*queryseq2) = (T) NULL;
+ } else if ((fulllength2 = input_oneline_bzip2(&nextchar2,&long_read_2,&(Read2[0]),
+#ifdef USE_MPI
+ filestring2,
+#endif
+ *input2,/*possible_fasta_header_p*/true)) == 0) {
+#ifdef USE_MPI
+ Filestring_putc(nextchar2,filestring2); /* For first "else if" clause */
+#endif
+ /* fprintf(stderr,"length is zero\n"); */
+ /* No sequence1. Don't process, but loop again */
+ /* nextchar2 = EOF; */
+ (*queryseq2) = (T) NULL;
+ } else {
+#ifdef USE_MPI
+ Filestring_putc(nextchar2,filestring2); /* For first "else if" clause */
+#endif
+ if (*nextchar == '+') {
+ /* End 1 with a quality string */
+ skip_header_bzip2(*input1,*nextchar);
+ *nextchar = bzgetc(*input1);
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring1);
+#endif
+ quality_length = input_oneline_bzip2(&(*nextchar),&long_quality,&(Quality[0]),
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,/*possible_fasta_header_p*/false);
+ if (quality_length != fulllength1) {
+ fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
+ quality_length,fulllength1,acc);
+ abort();
+ } else {
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
+ Quality,long_quality,quality_length,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ }
+ } else {
+ /* End 1 without quality string */
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
+ /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ }
+
+ if (nextchar2 == '+') {
+ /* End 2 with a quality string */
+ skip_header_bzip2(*input2,nextchar2);
+ nextchar2 = bzgetc(*input2);
+#ifdef USE_MPI
+ Filestring_putc(nextchar2,filestring2);
+#endif
+ quality_length = input_oneline_bzip2(&nextchar2,&long_quality,&(Quality[0]),
+#ifdef USE_MPI
+ filestring2,
+#endif
+ *input2,/*possible_fasta_header_p*/false);
+ if (quality_length != fulllength2) {
+ fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
+ quality_length,fulllength2,acc2);
+ abort();
+ } else {
+ /* For FASTA, drop second accession */
+ (*queryseq2) = Shortread_new(/*acc2*/NULL,/*restofheader2*/NULL,filterp,Read2,long_read_2,fulllength2,
+ Quality,long_quality,quality_length,barcode_length,
+ invert_second_p,/*copy_acc_p*/false,skipp);
+ FREE_IN(acc2);
+ FREE_IN(restofheader2);
+ }
+ } else {
+ /* End 2 without quality string */
+ (*queryseq2) = Shortread_new(/*acc2*/NULL,/*restofheader2*/NULL,filterp,Read2,long_read_2,fulllength2,
+ /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
+ invert_second_p,/*copy_acc_p*/false,skipp);
+ FREE_IN(acc2);
+ FREE_IN(restofheader2);
+ }
+ }
+
+ } else {
+ /* Single-end: Either EOF, '>', or '+' */
+ if (*nextchar == '+') {
+ /* Single-end with a quality string */
+ skip_header_bzip2(*input1,*nextchar);
+ *nextchar = bzgetc(*input1);
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring1);
+#endif
+ quality_length = input_oneline_bzip2(&(*nextchar),&long_quality,&(Quality[0]),
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,/*possible_fasta_header_p*/false);
+ if (quality_length != fulllength1) {
+ fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
+ quality_length,fulllength1,acc);
+ abort();
+ } else {
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
+ Quality,long_quality,quality_length,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ }
+ } else {
+ /* Single-end without quality string */
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength1,
+ /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ }
+ (*queryseq2) = (T) NULL;
+ }
+ }
+
+ if (queryseq1 == (T) SKIPPED) {
+ return (T) SKIPPED;
+ } else if (queryseq1 != NULL && queryseq1->acc != NULL && queryseq1->fulllength > 0) {
+ debug(printf("Returning queryseq with contents %s\n",queryseq1->contents));
+ return queryseq1;
+ }
+ }
+
+ }
+}
+#endif
+
+
+T
+Shortread_read_fastq_text (int *nextchar, int *nchars1, int *nchars2, T *queryseq2,
+ FILE **input1, FILE **input2,
+ char ***files, int *nfiles, bool skipp) {
+ T queryseq1;
+ int nextchar2 = '\0'; /* Can be anything but EOF */
+ char *acc, *restofheader;
+ char *long_read_1, *long_read_2, *long_quality;
+ int fulllength, quality_length;
+ bool filterp;
+
+ while (1) {
+ queryseq1 = *queryseq2 = (T) NULL;
+ if (*input1 == NULL || *nextchar == EOF) { /* was feof(input1) */
+ if (*input1 != NULL) {
+ debugf(fprintf(stderr,"Master closing input 1 using fclose\n"));
+ fclose(*input1);
+ *input1 = NULL;
+ }
+ if (*input2 != NULL) {
+ debugf(fprintf(stderr,"Master closing input 2 using fclose\n"));
+ fclose(*input2);
+ *input2 = NULL;
+ }
+
+ if (*nfiles == 0) {
+ *nextchar = EOF;
+ return (T) NULL;
+
+ } else if (*nfiles == 1 || force_single_end_p == true) {
+ if ((*input1 = FOPEN_READ_TEXT((*files)[0])) == NULL) {
+ fprintf(stderr,"Can't open file %s => skipping.\n",(*files)[0]);
+ (*files) += 1;
+ (*nfiles) -= 1;
+ *nextchar = EOF;
+ return (T) NULL;
+ } else {
+ debugf(fprintf(stderr,"Master opening input file 1\n"));
+ *input2 = NULL;
+ (*files) += 1;
+ (*nfiles) -= 1;
+ nextchar2 = '\0';
+ }
+
+ } else {
+ while (*nfiles > 0 &&
+ ((*input1 = FOPEN_READ_TEXT((*files)[0])) == NULL ||
+ (*input2 = FOPEN_READ_TEXT((*files)[1])) == NULL)) {
+ fprintf(stderr,"Can't open file %s or %s => skipping both.\n",
+ (*files)[0],(*files)[1]);
+ (*files) += 2;
+ (*nfiles) -= 2;
+ }
+ if (*input1 == NULL) {
+ *nextchar = EOF;
+ return (T) NULL;
+ } else {
+ debugf(fprintf(stderr,"Master opening input files 1 and 2\n"));
+ (*files) += 2;
+ (*nfiles) -= 2;
+ *nextchar = '\0';
+ }
+ }
+ }
+
+ debug(printf("** Getting header\n"));
+ if ((acc = input_header_fastq(&(*nchars1),&filterp,&restofheader,*nextchar,*input1,skipp)) == NULL) {
+ /* fprintf(stderr,"No header\n"); */
+ /* File ends after >. Don't process, but loop again */
+ *nextchar = EOF;
+ } else {
+ *nextchar = fgetc(*input1);
+ *nchars1 += 1;
+ if ((fulllength = input_oneline(&(*nextchar),&(*nchars1),&long_read_1,&(Read1[0]),*input1,
+ /*possible_fasta_header_p*/true)) == 0) {
+ FREE_IN(acc);
+ FREE_IN(restofheader);
+ /* fprintf(stderr,"length is zero\n"); */
+ /* No sequence1. Don't process, but loop again */
+ /* *nextchar = EOF; */
+
+ } else if (*nextchar != '+') {
+ /* No quality */
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength,
+ /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ } else {
+ skip_header(&(*nchars1),*input1,*nextchar);
+ *nextchar = fgetc(*input1);
+ *nchars1 += 1;
+ quality_length = input_oneline(&(*nextchar),&(*nchars1),&long_quality,&(Quality[0]),*input1,
+ /*possible_fasta_header_p*/false);
+ if (quality_length != fulllength) {
+ fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
+ quality_length,fulllength,acc);
+ abort();
+ } else {
+ /* Has quality */
+ queryseq1 = Shortread_new(acc,restofheader,filterp,Read1,long_read_1,fulllength,
+ Quality,long_quality,quality_length,barcode_length,
+ invert_first_p,/*copy_acc_p*/false,skipp);
+ }
+ }
+ }
+
+ if (acc == NULL || fulllength == 0) {
+ /* Skip */
+ } else if (*input2 == NULL) {
+ *queryseq2 = (T) NULL;
+ } else {
+ if ((acc = input_header_fastq(&(*nchars2),&filterp,&restofheader,nextchar2,*input2,skipp)) == NULL) {
+ /* fprintf(stderr,"No header\n"); */
+ /* File ends after >. Don't process, but loop again */
+ nextchar2 = EOF;
+ } else {
+ if (skipp == true) {
+ /* Do not check endings */
+ } else if (allow_paired_end_mismatch_p == true) {
+ /* Do not strip endings, and keep second accession */
+ FREE_IN(restofheader);
+ } else {
+ strip_illumina_acc_ending(queryseq1->acc,acc);
+ if (strcmp(queryseq1->acc,acc)) {
+ fprintf(stderr,"Paired-end accessions %s and %s do not match\n",queryseq1->acc,acc);
+ exit(9);
+ } else {
+ FREE_IN(acc);
+ FREE_IN(restofheader);
+ acc = (char *) NULL;
+ }
+ }
+ nextchar2 = fgetc(*input2);
+ *nchars2 += 1;
+ if ((fulllength = input_oneline(&nextchar2,&(*nchars2),&long_read_2,&(Read2[0]),*input2,
+ /*possible_fasta_header_p*/true)) == 0) {
+ FREE_IN(acc);
+ FREE_IN(restofheader);
+ /* fprintf(stderr,"length is zero\n"); */
+ /* No sequence2. Don't process, but loop again */
+ /* nextchar2 = EOF; */
+
+ } else if (nextchar2 != '+') {
+ /* No quality */
+ (*queryseq2) = Shortread_new(acc,/*restofheader*/NULL,filterp,Read2,long_read_2,fulllength,
+ /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
+ invert_second_p,/*copy_acc_p*/false,skipp);
+ } else {
+ skip_header(&(*nchars2),*input2,nextchar2);
+ nextchar2 = fgetc(*input2);
+ *nchars2 += 1;
+ quality_length = input_oneline(&nextchar2,&(*nchars2),&long_quality,&(Quality[0]),*input2,
+ /*possible_fasta_header_p*/false);
+ if (quality_length != fulllength) {
+ fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
+ quality_length,fulllength,acc);
+ abort();
+ } else {
+ /* Has quality */
+ (*queryseq2) = Shortread_new(acc,/*restofheader*/NULL,filterp,Read2,long_read_2,fulllength,
+ Quality,long_quality,quality_length,barcode_length,
+ invert_second_p,/*copy_acc_p*/false,skipp);
+
+ }
+ }
+ }
+ }
+
+ if (queryseq1 == (T) SKIPPED) {
+ return (T) SKIPPED;
+ } else if (queryseq1 != NULL && queryseq1->acc != NULL && queryseq1->fulllength > 0) {
+ return queryseq1;
+ }
+ }
+}
+
+
+#ifdef USE_MPI
+static T
+read_fastq_filecontents (int *nextchar, T *queryseq2,
+ char **filecontents1, char **filecontents2,
+#ifdef USE_MPI_FILE_INPUT
+ MPI_File *input1, MPI_File *input2, MPI_Comm workers_comm,
+#else
+ FILE **input1, FILE **input2,
+#endif
+ char ***files, int *nfiles, bool skipp) {
T queryseq1;
+ int nextchar2 = '\0';
char *acc, *restofheader;
char *long_read_1, *long_read_2, *long_quality;
- int nextchar2 = '\0';
int fulllength, quality_length;
bool filterp;
while (1) {
queryseq1 = *queryseq2 = (T) NULL;
- if (*input1 == NULL || feof(*input1)) {
+
+ if (*nextchar == '\0') {
if (*input1 != NULL) {
+#ifdef USE_MPI_FILE_INPUT
+ debugf(fprintf(stderr,"Slave closing input 1 using MPI_File_close\n"));
+ MPI_File_close(&(*input1));
+#else
+ debugf(fprintf(stderr,"Slave closing input 1 using fclose\n"));
fclose(*input1);
+#endif
*input1 = NULL;
}
+
if (*input2 != NULL) {
+#ifdef USE_MPI_FILE_INPUT
+ debugf(fprintf(stderr,"Slave closing input 2 using MPI_File_close\n"));
+ MPI_File_close(&(*input2));
+#else
+ debugf(fprintf(stderr,"Slave closing input 2 using fclose\n"));
fclose(*input2);
+#endif
*input2 = NULL;
}
if (*nfiles == 0) {
- *nextchar = EOF;
+ *nextchar = '\0';
return (T) NULL;
} else if (*nfiles == 1 || force_single_end_p == true) {
+#ifdef USE_MPI_FILE_INPUT
+ if ((*input1 = MPI_fopen((*files)[0],workers_comm)) == NULL) {
+ fprintf(stderr,"Can't open file %s => skipping.\n",(*files)[0]);
+ (*files) += 1;
+ (*nfiles) -= 1;
+ *nextchar = '\0';
+ return (T) NULL;
+ } else {
+ debugf(fprintf(stderr,"Slave opening input file 1\n"));
+ *input2 = NULL;
+ (*files) += 1;
+ (*nfiles) -= 1;
+ nextchar2 = '\0';
+ }
+#else
if ((*input1 = FOPEN_READ_TEXT((*files)[0])) == NULL) {
fprintf(stderr,"Can't open file %s => skipping.\n",(*files)[0]);
(*files) += 1;
@@ -2699,13 +4188,34 @@ Shortread_read_fastq_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
*nextchar = EOF;
return (T) NULL;
} else {
+ debugf(fprintf(stderr,"Slave opening input file 1\n"));
*input2 = NULL;
(*files) += 1;
(*nfiles) -= 1;
- *nextchar = '\0';
+ nextchar2 = '\0';
}
+#endif
} else {
+#ifdef USE_MPI_FILE_INPUT
+ while (*nfiles > 0 &&
+ ((*input1 = MPI_fopen((*files)[0],workers_comm)) == NULL ||
+ (*input2 = MPI_fopen((*files)[1],workers_comm)) == NULL)) {
+ fprintf(stderr,"Can't open file %s or %s => skipping both.\n",
+ (*files)[0],(*files)[1]);
+ (*files) += 2;
+ (*nfiles) -= 2;
+ }
+ if (*input1 == NULL) {
+ *nextchar = '\0';
+ return (T) NULL;
+ } else {
+ debugf(fprintf(stderr,"Slave opening input files 1 and 2\n"));
+ (*files) += 2;
+ (*nfiles) -= 2;
+ *nextchar = '\0';
+ }
+#else
while (*nfiles > 0 &&
((*input1 = FOPEN_READ_TEXT((*files)[0])) == NULL ||
(*input2 = FOPEN_READ_TEXT((*files)[1])) == NULL)) {
@@ -2718,27 +4228,29 @@ Shortread_read_fastq_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
*nextchar = EOF;
return (T) NULL;
} else {
+ debugf(fprintf(stderr,"Slave opening input files 1 and 2\n"));
(*files) += 2;
(*nfiles) -= 2;
*nextchar = '\0';
}
+#endif
}
}
debug(printf("** Getting header\n"));
- if ((acc = input_header_fastq(&filterp,&restofheader,*input1,skipp)) == NULL) {
+ if ((acc = input_header_fastq_filecontents(&filterp,&restofheader,*nextchar,&(*filecontents1),skipp)) == NULL) {
/* fprintf(stderr,"No header\n"); */
/* File ends after >. Don't process, but loop again */
- *nextchar = EOF;
+ *nextchar = '\0';
} else {
- *nextchar = fgetc(*input1);
- if ((fulllength = input_oneline(&(*nextchar),&long_read_1,&(Read1[0]),*input1,
- /*possible_fasta_header_p*/true)) == 0) {
+ *nextchar = *(*filecontents1)++;
+ if ((fulllength = input_oneline_filecontents(&(*nextchar),&long_read_1,&(Read1[0]),&(*filecontents1),
+ /*possible_fasta_header_p*/true)) == 0) {
FREE_IN(acc);
FREE_IN(restofheader);
/* fprintf(stderr,"length is zero\n"); */
/* No sequence1. Don't process, but loop again */
- /* *nextchar = EOF; */
+ /* *nextchar = '\0'; */
} else if (*nextchar != '+') {
/* No quality */
@@ -2746,10 +4258,10 @@ Shortread_read_fastq_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
/*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
invert_first_p,/*copy_acc_p*/false,skipp);
} else {
- skip_header(*input1);
- *nextchar = fgetc(*input1);
- quality_length = input_oneline(&(*nextchar),&long_quality,&(Quality[0]),*input1,
- /*possible_fasta_header_p*/false);
+ skip_header_filecontents(&(*filecontents1),*nextchar);
+ *nextchar = *(*filecontents1)++;
+ quality_length = input_oneline_filecontents(&(*nextchar),&long_quality,&(Quality[0]),&(*filecontents1),
+ /*possible_fasta_header_p*/false);
if (quality_length != fulllength) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
quality_length,fulllength,acc);
@@ -2765,13 +4277,13 @@ Shortread_read_fastq_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
if (acc == NULL || fulllength == 0) {
/* Skip */
- } else if (*input2 == NULL) {
+ } else if (*filecontents2 == NULL) {
*queryseq2 = (T) NULL;
} else {
- if ((acc = input_header_fastq(&filterp,&restofheader,*input2,skipp)) == NULL) {
+ if ((acc = input_header_fastq_filecontents(&filterp,&restofheader,nextchar2,&(*filecontents2),skipp)) == NULL) {
/* fprintf(stderr,"No header\n"); */
/* File ends after >. Don't process, but loop again */
- nextchar2 = EOF;
+ nextchar2 = '\0';
} else {
if (skipp == true) {
/* Do not check endings */
@@ -2789,14 +4301,14 @@ Shortread_read_fastq_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
acc = (char *) NULL;
}
}
- nextchar2 = fgetc(*input2);
- if ((fulllength = input_oneline(&nextchar2,&long_read_2,&(Read2[0]),*input2,
- /*possible_fasta_header_p*/true)) == 0) {
+ nextchar2 = *(*filecontents2)++;
+ if ((fulllength = input_oneline_filecontents(&nextchar2,&long_read_2,&(Read2[0]),&(*filecontents2),
+ /*possible_fasta_header_p*/true)) == 0) {
FREE_IN(acc);
FREE_IN(restofheader);
/* fprintf(stderr,"length is zero\n"); */
/* No sequence2. Don't process, but loop again */
- /* *nextchar = EOF; */
+ /* nextchar2 = '\0'; */
} else if (nextchar2 != '+') {
/* No quality */
@@ -2804,10 +4316,10 @@ Shortread_read_fastq_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
/*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
invert_second_p,/*copy_acc_p*/false,skipp);
} else {
- skip_header(*input2);
- nextchar2 = fgetc(*input2);
- quality_length = input_oneline(&nextchar2,&long_quality,&(Quality[0]),*input2,
- /*possible_fasta_header_p*/false);
+ skip_header_filecontents(&(*filecontents2),nextchar2);
+ nextchar2 = *(*filecontents2)++;
+ quality_length = input_oneline_filecontents(&nextchar2,&long_quality,&(Quality[0]),&(*filecontents2),
+ /*possible_fasta_header_p*/false);
if (quality_length != fulllength) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
quality_length,fulllength,acc);
@@ -2830,23 +4342,27 @@ Shortread_read_fastq_shortreads (int *nextchar, T *queryseq2, FILE **input1, FIL
}
}
}
+#endif
#ifdef HAVE_ZLIB
T
-Shortread_read_fastq_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input1, gzFile *input2,
- char ***files, int *nfiles, bool skipp,
- int barcode_length, bool invert_first_p, bool invert_second_p) {
+Shortread_read_fastq_gzip (int *nextchar, T *queryseq2,
+#ifdef USE_MPI
+ Filestring_T filestring1, Filestring_T filestring2,
+#endif
+ gzFile *input1, gzFile *input2,
+ char ***files, int *nfiles, bool skipp) {
T queryseq1;
+ int nextchar2 = '\0';
char *acc, *restofheader;
char *long_read_1, *long_read_2, *long_quality;
- int nextchar2 = '\0';
int fulllength, quality_length;
bool filterp;
while (1) {
queryseq1 = *queryseq2 = (T) NULL;
- if (*input1 == NULL || gzeof(*input1)) {
+ if (*input1 == NULL || *nextchar == EOF) { /* was gzeof(*input1) */
if (*input1 != NULL) {
gzclose(*input1);
*input1 = NULL;
@@ -2872,7 +4388,7 @@ Shortread_read_fastq_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
*input2 = NULL;
(*files) += 1;
(*nfiles) -= 1;
- *nextchar = '\0';
+ nextchar2 = '\0';
} else {
if ((*input1 = gzopen((*files)[0],"rb")) == NULL) {
@@ -2900,14 +4416,24 @@ Shortread_read_fastq_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
}
debug(printf("** Getting header\n"));
- if ((acc = input_header_fastq_gzip(&filterp,&restofheader,*input1,skipp)) == NULL) {
+ if ((acc = input_header_fastq_gzip(&filterp,&restofheader,*nextchar,
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,skipp)) == NULL) {
/* fprintf(stderr,"No header\n"); */
/* File ends after >. Don't process. */
*nextchar = EOF;
} else {
*nextchar = gzgetc(*input1);
- if ((fulllength = input_oneline_gzip(&(*nextchar),&long_read_1,&(Read1[0]),*input1,
- /*possible_fasta_header_p*/true)) == 0) {
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring1);
+#endif
+ if ((fulllength = input_oneline_gzip(&(*nextchar),&long_read_1,&(Read1[0]),
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,/*possible_fasta_header_p*/true)) == 0) {
FREE_IN(acc);
FREE_IN(restofheader);
/* fprintf(stderr,"length is zero\n"); */
@@ -2920,10 +4446,16 @@ Shortread_read_fastq_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
/*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
invert_first_p,/*copy_acc_p*/false,skipp);
} else {
- skip_header_gzip(*input1);
+ skip_header_gzip(*input1,*nextchar);
*nextchar = gzgetc(*input1);
- quality_length = input_oneline_gzip(&(*nextchar),&long_quality,&(Quality[0]),*input1,
- /*possible_fasta_header_p*/false);
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring1);
+#endif
+ quality_length = input_oneline_gzip(&(*nextchar),&long_quality,&(Quality[0]),
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,/*possible_fasta_header_p*/false);
if (quality_length != fulllength) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
quality_length,fulllength,acc);
@@ -2942,7 +4474,11 @@ Shortread_read_fastq_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
} else if (*input2 == NULL) {
*queryseq2 = (T) NULL;
} else {
- if ((acc = input_header_fastq_gzip(&filterp,&restofheader,*input2,skipp)) == NULL) {
+ if ((acc = input_header_fastq_gzip(&filterp,&restofheader,nextchar2,
+#ifdef USE_MPI
+ filestring2,
+#endif
+ *input2,skipp)) == NULL) {
/* fprintf(stderr,"No header\n"); */
/* File ends after >. Don't process, but loop again */
nextchar2 = EOF;
@@ -2964,13 +4500,19 @@ Shortread_read_fastq_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
}
}
nextchar2 = gzgetc(*input2);
- if ((fulllength = input_oneline_gzip(&nextchar2,&long_read_2,&(Read2[0]),*input2,
- /*possible_fasta_header_p*/true)) == 0) {
+#ifdef USE_MPI
+ Filestring_putc(nextchar2,filestring2);
+#endif
+ if ((fulllength = input_oneline_gzip(&nextchar2,&long_read_2,&(Read2[0]),
+#ifdef USE_MPI
+ filestring2,
+#endif
+ *input2,/*possible_fasta_header_p*/true)) == 0) {
FREE_IN(acc);
FREE_IN(restofheader);
/* fprintf(stderr,"length is zero\n"); */
/* No sequence2. Don't process, but loop again */
- /* *nextchar = EOF; */
+ /* nextchar2 = EOF; */
} else if (nextchar2 != '+') {
/* No quality */
@@ -2978,10 +4520,16 @@ Shortread_read_fastq_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
/*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
invert_second_p,/*copy_acc_p*/false,skipp);
} else {
- skip_header_gzip(*input2);
+ skip_header_gzip(*input2,nextchar2);
nextchar2 = gzgetc(*input2);
- quality_length = input_oneline_gzip(&nextchar2,&long_quality,&(Quality[0]),*input2,
- /*possible_fasta_header_p*/false);
+#ifdef USE_MPI
+ Filestring_putc(nextchar2,filestring2);
+#endif
+ quality_length = input_oneline_gzip(&nextchar2,&long_quality,&(Quality[0]),
+#ifdef USE_MPI
+ filestring2,
+#endif
+ *input2,/*possible_fasta_header_p*/false);
if (quality_length != fulllength) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
quality_length,fulllength,acc);
@@ -3008,19 +4556,22 @@ Shortread_read_fastq_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input
#ifdef HAVE_BZLIB
T
-Shortread_read_fastq_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *input1, Bzip2_T *input2,
- char ***files, int *nfiles, bool skipp,
- int barcode_length, bool invert_first_p, bool invert_second_p) {
+Shortread_read_fastq_bzip2 (int *nextchar, T *queryseq2,
+#ifdef USE_MPI
+ Filestring_T filestring1, Filestring_T filestring2,
+#endif
+ Bzip2_T *input1, Bzip2_T *input2,
+ char ***files, int *nfiles, bool skipp) {
T queryseq1;
+ int nextchar2 = '\0';
char *acc, *restofheader;
char *long_read_1, *long_read_2, *long_quality;
- int nextchar2 = '\0';
int fulllength, quality_length;
bool filterp;
while (1) {
queryseq1 = *queryseq2 = (T) NULL;
- if (*input1 == NULL || bzeof(*input1)) {
+ if (*input1 == NULL || *nextchar == EOF) { /* Was bzeof(*input1) */
if (*input1 != NULL) {
Bzip2_free(&(*input1));
*input1 = NULL;
@@ -3042,7 +4593,7 @@ Shortread_read_fastq_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
*input2 = NULL;
(*files) += 1;
(*nfiles) -= 1;
- *nextchar = '\0';
+ nextchar2 = '\0';
} else {
if ((*input1 = Bzip2_new((*files)[0])) == NULL) {
@@ -3062,14 +4613,24 @@ Shortread_read_fastq_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
}
debug(printf("** Getting header\n"));
- if ((acc = input_header_fastq_bzip2(&filterp,&restofheader,*input1,skipp)) == NULL) {
+ if ((acc = input_header_fastq_bzip2(&filterp,&restofheader,*nextchar,
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,skipp)) == NULL) {
/* fprintf(stderr,"No header\n"); */
/* File ends after >. Don't process. */
*nextchar = EOF;
} else {
*nextchar = bzgetc(*input1);
- if ((fulllength = input_oneline_bzip2(&(*nextchar),&long_read_1,&(Read1[0]),*input1,
- /*possible_fasta_header_p*/true)) == 0) {
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring1);
+#endif
+ if ((fulllength = input_oneline_bzip2(&(*nextchar),&long_read_1,&(Read1[0]),
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,/*possible_fasta_header_p*/true)) == 0) {
FREE_IN(acc);
FREE_IN(restofheader);
/* fprintf(stderr,"length is zero\n"); */
@@ -3082,10 +4643,16 @@ Shortread_read_fastq_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
/*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
invert_first_p,/*copy_acc_p*/false,skipp);
} else {
- skip_header_bzip2(*input1);
+ skip_header_bzip2(*input1,*nextchar);
*nextchar = bzgetc(*input1);
- quality_length = input_oneline_bzip2(&(*nextchar),&long_quality,&(Quality[0]),*input1,
- /*possible_fasta_header_p*/false);
+#ifdef USE_MPI
+ Filestring_putc(*nextchar,filestring1);
+#endif
+ quality_length = input_oneline_bzip2(&(*nextchar),&long_quality,&(Quality[0]),
+#ifdef USE_MPI
+ filestring1,
+#endif
+ *input1,/*possible_fasta_header_p*/false);
if (quality_length != fulllength) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
quality_length,fulllength,acc);
@@ -3104,7 +4671,11 @@ Shortread_read_fastq_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
} else if (*input2 == NULL) {
*queryseq2 = (T) NULL;
} else {
- if ((acc = input_header_fastq_bzip2(&filterp,&restofheader,*input2,skipp)) == NULL) {
+ if ((acc = input_header_fastq_bzip2(&filterp,&restofheader,nextchar2,
+#ifdef USE_MPI
+ filestring2,
+#endif
+ *input2,skipp)) == NULL) {
/* fprintf(stderr,"No header\n"); */
/* File ends after >. Don't process, but loop again */
nextchar2 = EOF;
@@ -3126,13 +4697,19 @@ Shortread_read_fastq_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
}
}
nextchar2 = bzgetc(*input2);
- if ((fulllength = input_oneline_bzip2(&nextchar2,&long_read_2,&(Read2[0]),*input2,
- /*possible_fasta_header_p*/true)) == 0) {
+#ifdef USE_MPI
+ Filestring_putc(nextchar2,filestring2);
+#endif
+ if ((fulllength = input_oneline_bzip2(&nextchar2,&long_read_2,&(Read2[0]),
+#ifdef USE_MPI
+ filestring2,
+#endif
+ *input2,/*possible_fasta_header_p*/true)) == 0) {
FREE_IN(acc);
FREE_IN(restofheader);
/* fprintf(stderr,"length is zero\n"); */
/* No sequence2. Don't process, but loop again */
- /* *nextchar = EOF; */
+ /* nextchar2 = EOF; */
} else if (nextchar2 != '+') {
/* No quality */
@@ -3140,10 +4717,16 @@ Shortread_read_fastq_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
/*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
invert_second_p,/*copy_acc_p*/false,skipp);
} else {
- skip_header_bzip2(*input2);
+ skip_header_bzip2(*input2,nextchar2);
nextchar2 = bzgetc(*input2);
- quality_length = input_oneline_bzip2(&nextchar2,&long_quality,&(Quality[0]),*input2,
- /*possible_fasta_header_p*/false);
+#ifdef USE_MPI
+ Filestring_putc(nextchar2,filestring2);
+#endif
+ quality_length = input_oneline_bzip2(&nextchar2,&long_quality,&(Quality[0]),
+#ifdef USE_MPI
+ filestring2,
+#endif
+ *input2,/*possible_fasta_header_p*/false);
if (quality_length != fulllength) {
fprintf(stderr,"Length %d of quality score differs from length %d of nucleotides in sequence %s\n",
quality_length,fulllength,acc);
@@ -3168,78 +4751,228 @@ Shortread_read_fastq_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *inp
#endif
+/* Never uses MPI_File. Run by non-MPI version or by rank 0 in MPI version */
+T
+Shortread_read (int *nextchar, int *nchars1, int *nchars2, T *queryseq2,
+#ifdef USE_MPI
+ Filestring_T filestring1, Filestring_T filestring2,
+#endif
+ FILE **input1, FILE **input2,
+#ifdef HAVE_ZLIB
+ gzFile *gzipped1, gzFile *gzipped2,
+#endif
+#ifdef HAVE_BZLIB
+ Bzip2_T *bzipped1, Bzip2_T *bzipped2,
+#endif
+ char ***files, int *nfiles, bool skipp) {
+#ifdef DEBUG
+ T queryseq1;
+#endif
+
+ if (fastq_format_p) {
+#ifdef HAVE_ZLIB
+ if (*gzipped1 != NULL) {
+ return Shortread_read_fastq_gzip(&(*nextchar),&(*queryseq2),
+#ifdef USE_MPI
+ filestring1,filestring2,
+#endif
+ &(*gzipped1),&(*gzipped2),&(*files),&(*nfiles),skipp);
+ }
+#endif
+
+#ifdef HAVE_BZLIB
+ if (*bzipped1 != NULL) {
+ return Shortread_read_fastq_bzip2(&(*nextchar),&(*queryseq2),
+#ifdef USE_MPI
+ filestring1,filestring2,
+#endif
+ &(*bzipped1),&(*bzipped2),&(*files),&(*nfiles),skipp);
+ }
+#endif
+
+#ifdef DEBUG
+ queryseq1 = Shortread_read_fastq_text(&(*nextchar),&(*nchars1),&(*nchars2),&(*queryseq2),
+ &(*input1),&(*input2),&(*files),&(*nfiles),skipp);
+ printf("nchars1 %d, nchars2 %d, nextchar %c\n",*nchars1,*nchars2,*nextchar);
+ return queryseq1;
+#else
+ return Shortread_read_fastq_text(&(*nextchar),&(*nchars1),&(*nchars2),&(*queryseq2),
+ &(*input1),&(*input2),&(*files),&(*nfiles),skipp);
+#endif
+
+ } else {
+ /* FASTA input */
+#ifdef HAVE_ZLIB
+ if (*gzipped1 != NULL) {
+ return Shortread_read_fasta_gzip(&(*nextchar),&(*queryseq2),
+#ifdef USE_MPI
+ filestring1,filestring2,
+#endif
+ &(*gzipped1),&(*gzipped2),&(*files),&(*nfiles),skipp);
+ }
+#endif
+
+#ifdef HAVE_BZLIB
+ if (*bzipped1 != NULL) {
+ return Shortread_read_fasta_bzip2(&(*nextchar),&(*queryseq2),
+#ifdef USE_MPI
+ filestring1,filestring2,
+#endif
+ &(*bzipped1),&(*bzipped2),&(*files),&(*nfiles),skipp);
+ }
+#endif
+
+#ifdef DEBUG
+ queryseq1 = Shortread_read_fasta_text(&(*nextchar),&(*nchars1),&(*nchars2),&(*queryseq2),
+ &(*input1),&(*input2),&(*files),&(*nfiles),skipp);
+ printf("nchars1 %d, nchars2 %d, nextchar %c\n",*nchars1,*nchars2,*nextchar);
+ return queryseq1;
+#else
+ return Shortread_read_fasta_text(&(*nextchar),&(*nchars1),&(*nchars2),&(*queryseq2),
+ &(*input1),&(*input2),&(*files),&(*nfiles),skipp);
+#endif
+ }
+}
+
+
+#ifdef USE_MPI
+T
+Shortread_read_filecontents (int *nextchar, T *queryseq2,
+ char **filecontents1, char **filecontents2,
+#ifdef USE_MPI_FILE_INPUT
+ MPI_File *input1, MPI_File *input2, MPI_Comm workers_comm,
+#else
+ FILE **input1, FILE **input2,
+#endif
+ char ***files, int *nfiles, bool skipp) {
+
+ if (fastq_format_p) {
+ return read_fastq_filecontents(&(*nextchar),&(*queryseq2),&(*filecontents1),&(*filecontents2),
+ &(*input1),&(*input2),
+#ifdef USE_MPI_FILE_INPUT
+ workers_comm,
+#endif
+ &(*files),&(*nfiles),skipp);
+
+ } else {
+ return read_fasta_filecontents(&(*nextchar),&(*queryseq2),&(*filecontents1),&(*filecontents2),
+ &(*input1),&(*input2),
+#ifdef USE_MPI_FILE_INPUT
+ workers_comm,
+#endif
+ &(*files),&(*nfiles),skipp);
+ }
+}
+#endif
+
+
/* Calling procedure needs to print the initial ">", if desired */
void
-Shortread_print_header (FILE *fp, T queryseq1, T queryseq2) {
+Shortread_print_header (Filestring_T fp, T queryseq1, T queryseq2) {
if (queryseq2 == NULL || queryseq2->acc == NULL) {
- fprintf(fp,"%s",queryseq1->acc);
+ FPRINTF(fp,"%s",queryseq1->acc);
} else {
- fprintf(fp,"%s,%s",queryseq1->acc,queryseq2->acc);
+ FPRINTF(fp,"%s,%s",queryseq1->acc,queryseq2->acc);
}
if (queryseq1->restofheader == NULL || queryseq1->restofheader[0] == '\0') {
/* Don't print restofheader */
} else {
- fprintf(fp," %s",queryseq1->restofheader);
+ FPRINTF(fp," %s",queryseq1->restofheader);
}
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
return;
}
-void
-Shortread_print_query_singleend_fasta (FILE *fp, T queryseq, T headerseq) {
- fprintf(fp,">");
- Shortread_print_header(fp,headerseq,/*queryseq2*/NULL);
- /* fprintf(fp,"\n"); -- included in header */
- Shortread_print_oneline(fp,queryseq);
- fprintf(fp,"\n");
+static void
+stderr_header (T queryseq1, T queryseq2) {
+
+ if (queryseq2 == NULL || queryseq2->acc == NULL) {
+ fprintf(stderr,"%s",queryseq1->acc);
+ } else {
+ fprintf(stderr,"%s,%s",queryseq1->acc,queryseq2->acc);
+ }
+
+ if (queryseq1->restofheader == NULL || queryseq1->restofheader[0] == '\0') {
+ /* Don't print restofheader */
+ } else {
+ fprintf(stderr," %s",queryseq1->restofheader);
+ }
+
+ fprintf(stderr,"\n");
return;
}
-void
-Shortread_print_query_singleend_fastq (FILE *fp, T queryseq, T headerseq) {
- fprintf(fp,"@");
- Shortread_print_header(fp,headerseq,/*queryseq2*/NULL);
- /* fprintf(fp,"\n"); -- included in header */
- Shortread_print_oneline(fp,queryseq);
- fprintf(fp,"\n");
+static void
+stderr_oneline (T this) {
+ int i = 0;
+
+ if (this->fulllength == 0 || isspace(this->contents[0])) {
+ fprintf(stderr,"(null)");
+ } else {
+ for (i = 0; i < this->fulllength; i++) {
+ fprintf(stderr,"%c",this->contents[i]);
+ }
+ for (i = 0; i < this->choplength; i++) {
+ fprintf(stderr,"%c",this->chop[i]);
+ }
+ }
+ return;
+}
+
+static void
+stderr_oneline_revcomp (T this) {
+ int i = 0;
- if (queryseq->quality != NULL) {
- fprintf(fp,"+\n");
- Shortread_print_quality(fp,queryseq,/*hardclip_low*/0,/*hardclip_high*/0,
- /*shift*/0,/*choppedp*/false);
- fprintf(fp,"\n");
+ for (i = this->fulllength-1; i >= 0; --i) {
+ fprintf(stderr,"%c",complCode[(int) this->contents[i]]);
+ }
+ for (i = this->choplength-1; i >= 0; --i) {
+ fprintf(stderr,"%c",complCode[(int) this->chop[i]]);
}
return;
}
+
+
+void
+Shortread_stderr_query_singleend_fasta (T queryseq, T headerseq) {
+ fprintf(stderr,">");
+ stderr_header(headerseq,/*queryseq2*/NULL);
+ /* fprintf(stderr,"\n"); -- included in header */
+ stderr_oneline(queryseq);
+ fprintf(stderr,"\n");
+
+ return;
+}
+
void
-Shortread_print_query_pairedend_fasta (FILE *fp, T queryseq1, T queryseq2,
- bool invert_first_p, bool invert_second_p) {
- fprintf(fp,">");
- Shortread_print_header(fp,queryseq1,queryseq2);
- /* fprintf(fp,"\n"); -- included in header */
+Shortread_stderr_query_pairedend_fasta (T queryseq1, T queryseq2,
+ bool invert_first_p, bool invert_second_p) {
+ fprintf(stderr,">");
+ stderr_header(queryseq1,queryseq2);
+ /* fprintf(stderr,"\n"); -- included in header */
if (invert_first_p == true) {
- Shortread_print_oneline_revcomp(fp,queryseq1);
- fprintf(fp,"\n");
+ stderr_oneline_revcomp(queryseq1);
+ fprintf(stderr,"\n");
} else {
- Shortread_print_oneline(fp,queryseq1);
- fprintf(fp,"\n");
+ stderr_oneline(queryseq1);
+ fprintf(stderr,"\n");
}
if (invert_second_p == true) {
- Shortread_print_oneline_revcomp(fp,queryseq2);
- fprintf(fp,"\n");
+ stderr_oneline_revcomp(queryseq2);
+ fprintf(stderr,"\n");
} else {
- Shortread_print_oneline(fp,queryseq2);
- fprintf(fp,"\n");
+ stderr_oneline(queryseq2);
+ fprintf(stderr,"\n");
}
return;
@@ -3247,59 +4980,111 @@ Shortread_print_query_pairedend_fasta (FILE *fp, T queryseq1, T queryseq2,
void
-Shortread_print_query_pairedend_fastq (FILE *fp1, FILE *fp2, T queryseq1, T queryseq2,
- bool invert_first_p, bool invert_second_p) {
- /* First end */
- if (queryseq2->acc == NULL) {
- fprintf(fp1,"@%s/1\n",queryseq1->acc);
+Shortread_print_query_singleend (Filestring_T fp, T queryseq, T headerseq) {
+ if (fastq_format_p == true) {
+ /* FASTQ format */
+ FPRINTF(fp,"@");
+ Shortread_print_header(fp,headerseq,/*queryseq2*/NULL);
+ /* FPRINTF(fp,"\n"); -- included in header */
+ Shortread_print_oneline(fp,queryseq);
+ FPRINTF(fp,"\n");
+
+ if (queryseq->quality != NULL) {
+ FPRINTF(fp,"+\n");
+ Shortread_print_quality(fp,queryseq,/*hardclip_low*/0,/*hardclip_high*/0,
+ /*shift*/0,/*choppedp*/false);
+ FPRINTF(fp,"\n");
+ }
+
} else {
- fprintf(fp2,"@%s\n",queryseq1->acc); /* Allowing paired-end name mismatch */
+ /* FASTA format */
+ FPRINTF(fp,">");
+ Shortread_print_header(fp,headerseq,/*queryseq2*/NULL);
+ Shortread_print_oneline(fp,queryseq);
+ FPRINTF(fp,"\n");
}
- if (invert_first_p == true) {
- Shortread_print_oneline_revcomp(fp1,queryseq1);
- fprintf(fp1,"\n");
- if (queryseq1->quality != NULL) {
- fprintf(fp1,"+\n");
- Shortread_print_quality_revcomp(fp1,queryseq1,/*hardclip_low*/0,/*hardclip_high*/0,
- /*shift*/0,/*choppedp*/false);
- fprintf(fp1,"\n");
+ return;
+}
+
+void
+Shortread_print_query_pairedend (Filestring_T fp1, Filestring_T fp2, T queryseq1, T queryseq2) {
+ if (fastq_format_p == true) {
+ /* FASTQ format */
+
+ /* First end */
+ if (queryseq2->acc == NULL) {
+ FPRINTF(fp1,"@%s/1\n",queryseq1->acc);
+ } else {
+ FPRINTF(fp2,"@%s\n",queryseq1->acc); /* Allowing paired-end name mismatch */
}
- } else {
- Shortread_print_oneline(fp1,queryseq1);
- fprintf(fp1,"\n");
- if (queryseq1->quality != NULL) {
- fprintf(fp1,"+\n");
- Shortread_print_quality(fp1,queryseq1,/*hardclip_low*/0,/*hardclip_high*/0,
- /*shift*/0,/*choppedp*/false);
- fprintf(fp1,"\n");
+
+ if (invert_first_p == true) {
+ Shortread_print_oneline_revcomp(fp1,queryseq1);
+ FPRINTF(fp1,"\n");
+ if (queryseq1->quality != NULL) {
+ FPRINTF(fp1,"+\n");
+ Shortread_print_quality_revcomp(fp1,queryseq1,/*hardclip_low*/0,/*hardclip_high*/0,
+ /*shift*/0,/*choppedp*/false);
+ FPRINTF(fp1,"\n");
+ }
+ } else {
+ Shortread_print_oneline(fp1,queryseq1);
+ FPRINTF(fp1,"\n");
+ if (queryseq1->quality != NULL) {
+ FPRINTF(fp1,"+\n");
+ Shortread_print_quality(fp1,queryseq1,/*hardclip_low*/0,/*hardclip_high*/0,
+ /*shift*/0,/*choppedp*/false);
+ FPRINTF(fp1,"\n");
+ }
}
- }
- /* Second end */
- if (queryseq2->acc == NULL) {
- fprintf(fp2,"@%s/2\n",queryseq1->acc); /* Acc stored only for first end, not second end */
- } else {
- fprintf(fp2,"@%s\n",queryseq2->acc); /* Allowing paired-end name mismatch */
- }
+ /* Second end */
+ if (queryseq2->acc == NULL) {
+ FPRINTF(fp2,"@%s/2\n",queryseq1->acc); /* Acc stored only for first end, not second end */
+ } else {
+ FPRINTF(fp2,"@%s\n",queryseq2->acc); /* Allowing paired-end name mismatch */
+ }
- if (invert_second_p == true) {
- Shortread_print_oneline_revcomp(fp2,queryseq2);
- fprintf(fp2,"\n");
- if (queryseq2->quality != NULL) {
- fprintf(fp2,"+\n");
- Shortread_print_quality_revcomp(fp2,queryseq2,/*hardclip_low*/0,/*hardclip_high*/0,
- /*shift*/0,/*chopped*/false);
- fprintf(fp2,"\n");
+ if (invert_second_p == true) {
+ Shortread_print_oneline_revcomp(fp2,queryseq2);
+ FPRINTF(fp2,"\n");
+ if (queryseq2->quality != NULL) {
+ FPRINTF(fp2,"+\n");
+ Shortread_print_quality_revcomp(fp2,queryseq2,/*hardclip_low*/0,/*hardclip_high*/0,
+ /*shift*/0,/*chopped*/false);
+ FPRINTF(fp2,"\n");
+ }
+ } else {
+ Shortread_print_oneline(fp2,queryseq2);
+ FPRINTF(fp2,"\n");
+ if (queryseq2->quality != NULL) {
+ FPRINTF(fp2,"+\n");
+ Shortread_print_quality(fp2,queryseq2,/*hardclip_low*/0,/*hardclip_high*/0,
+ /*shift*/0,/*choppedp*/false);
+ FPRINTF(fp2,"\n");
+ }
}
+
} else {
- Shortread_print_oneline(fp2,queryseq2);
- fprintf(fp2,"\n");
- if (queryseq2->quality != NULL) {
- fprintf(fp2,"+\n");
- Shortread_print_quality(fp2,queryseq2,/*hardclip_low*/0,/*hardclip_high*/0,
- /*shift*/0,/*choppedp*/false);
- fprintf(fp2,"\n");
+ /* FASTA format */
+ FPRINTF(fp1,">");
+ Shortread_print_header(fp1,queryseq1,queryseq2);
+
+ if (invert_first_p == true) {
+ Shortread_print_oneline_revcomp(fp1,queryseq1);
+ FPRINTF(fp1,"\n");
+ } else {
+ Shortread_print_oneline(fp1,queryseq1);
+ FPRINTF(fp1,"\n");
+ }
+
+ if (invert_second_p == true) {
+ Shortread_print_oneline_revcomp(fp1,queryseq2);
+ FPRINTF(fp1,"\n");
+ } else {
+ Shortread_print_oneline(fp1,queryseq2);
+ FPRINTF(fp1,"\n");
}
}
@@ -3308,31 +5093,31 @@ Shortread_print_query_pairedend_fastq (FILE *fp1, FILE *fp2, T queryseq1, T quer
void
-Shortread_print_oneline (FILE *fp, T this) {
+Shortread_print_oneline (Filestring_T fp, T this) {
int i = 0;
if (this->fulllength == 0 || isspace(this->contents[0])) {
- fprintf(fp,"(null)");
+ FPRINTF(fp,"(null)");
} else {
for (i = 0; i < this->fulllength; i++) {
- fprintf(fp,"%c",this->contents[i]);
+ FPRINTF(fp,"%c",this->contents[i]);
}
for (i = 0; i < this->choplength; i++) {
- fprintf(fp,"%c",this->chop[i]);
+ FPRINTF(fp,"%c",this->chop[i]);
}
}
return;
}
void
-Shortread_print_oneline_revcomp (FILE *fp, T this) {
+Shortread_print_oneline_revcomp (Filestring_T fp, T this) {
int i = 0;
for (i = this->fulllength-1; i >= 0; --i) {
- fprintf(fp,"%c",complCode[(int) this->contents[i]]);
+ FPRINTF(fp,"%c",complCode[(int) this->contents[i]]);
}
for (i = this->choplength-1; i >= 0; --i) {
- fprintf(fp,"%c",complCode[(int) this->chop[i]]);
+ FPRINTF(fp,"%c",complCode[(int) this->chop[i]]);
}
return;
@@ -3340,25 +5125,33 @@ Shortread_print_oneline_revcomp (FILE *fp, T this) {
void
-Shortread_print_chopped (FILE *fp, T this, int hardclip_low, int hardclip_high) {
+Shortread_print_chopped_sam (Filestring_T fp, T this, int hardclip_low, int hardclip_high) {
+#ifdef PRINT_INDIVIDUAL_CHARS
int i;
+#endif
if (this->fulllength == 0 || isspace(this->contents[0])) {
- fprintf(fp,"(null)");
+ FPRINTF(fp,"\t(null)");
} else {
+#ifdef PRINT_INDIVIDUAL_CHARS
+ FPRINTF(fp,"\t");
for (i = hardclip_low; i < this->fulllength - hardclip_high; i++) {
- fprintf(fp,"%c",this->contents[i]);
+ FPRINTF(fp,"%c",this->contents[i]);
}
+#else
+ FPRINTF(fp,"\t%.*s",this->fulllength - hardclip_high - hardclip_low,&(this->contents[hardclip_low]));
+#endif
}
return;
}
void
-Shortread_print_chopped_revcomp (FILE *fp, T this, int hardclip_low, int hardclip_high) {
+Shortread_print_chopped_revcomp_sam (Filestring_T fp, T this, int hardclip_low, int hardclip_high) {
int i;
+ FPRINTF(fp,"\t");
for (i = this->fulllength - 1 - hardclip_low; i >= hardclip_high; --i) {
- fprintf(fp,"%c",complCode[(int) this->contents[i]]);
+ FPRINTF(fp,"%c",complCode[(int) this->contents[i]]);
}
return;
@@ -3366,66 +5159,124 @@ Shortread_print_chopped_revcomp (FILE *fp, T this, int hardclip_low, int hardcli
/* For samprint XH field */
void
-Shortread_print_chopped_end (FILE *fp, T this, int hardclip_low, int hardclip_high) {
+Shortread_print_chopped_end (Filestring_T fp, T this, int hardclip_low, int hardclip_high) {
+#ifdef PRINT_INDIVIDUAL_CHARS
int i;
+#endif
if (hardclip_low > 0) {
+#ifdef PRINT_INDIVIDUAL_CHARS
for (i = 0; i < hardclip_low; i++) {
- fprintf(fp,"%c",this->contents[i]);
+ FPRINTF(fp,"%c",this->contents[i]);
}
+#else
+ FPRINTF(fp,"%.*s",hardclip_low,&(this->contents[0]));
+#endif
return;
} else {
+#ifdef PRINT_INDIVIDUAL_CHARS
for (i = this->fulllength - hardclip_high; i < this->fulllength; i++) {
- fprintf(fp,"%c",this->contents[i]);
+ FPRINTF(fp,"%c",this->contents[i]);
}
+#else
+ FPRINTF(fp,"%.*s",hardclip_high,&(this->contents[this->fulllength - hardclip_high]));
+#endif
return;
}
}
/* For samprint XH field */
void
-Shortread_print_chopped_end_revcomp (FILE *fp, T this, int hardclip_low, int hardclip_high) {
+Shortread_print_chopped_end_revcomp (Filestring_T fp, T this, int hardclip_low, int hardclip_high) {
int i;
if (hardclip_low > 0) {
for (i = this->fulllength - 1; i >= this->fulllength - hardclip_low; --i) {
- fprintf(fp,"%c",complCode[(int) this->contents[i]]);
+ FPRINTF(fp,"%c",complCode[(int) this->contents[i]]);
}
return;
} else {
for (i = hardclip_high - 1; i >= 0; --i) {
- fprintf(fp,"%c",complCode[(int) this->contents[i]]);
+ FPRINTF(fp,"%c",complCode[(int) this->contents[i]]);
+ }
+ return;
+ }
+}
+
+
+/* For samprint XI field */
+void
+Shortread_print_chopped_end_quality (Filestring_T fp, T this, int hardclip_low, int hardclip_high) {
+#ifdef PRINT_INDIVIDUAL_CHARS
+ int i;
+#endif
+
+ if (hardclip_low > 0) {
+#ifdef PRINT_INDIVIDUAL_CHARS
+ for (i = 0; i < hardclip_low; i++) {
+ FPRINTF(fp,"%c",this->quality[i]);
+ }
+#else
+ FPRINTF(fp,"%.*s",hardclip_low,&(this->quality[0]));
+#endif
+ return;
+
+ } else {
+#ifdef PRINT_INDIVIDUAL_CHARS
+ for (i = this->fulllength - hardclip_high; i < this->fulllength; i++) {
+ FPRINTF(fp,"%c",this->quality[i]);
}
+#else
+ FPRINTF(fp,"%.*s",hardclip_high,&(this->quality[this->fulllength - hardclip_high]));
+#endif
return;
}
}
+/* For samprint XI field */
+void
+Shortread_print_chopped_end_quality_reverse (Filestring_T fp, T this, int hardclip_low, int hardclip_high) {
+ int i;
+
+ if (hardclip_low > 0) {
+ for (i = this->fulllength - 1; i >= this->fulllength - hardclip_low; --i) {
+ FPRINTF(fp,"%c",this->quality[i]);
+ }
+ return;
+
+ } else {
+ for (i = hardclip_high - 1; i >= 0; --i) {
+ FPRINTF(fp,"%c",this->quality[i]);
+ }
+ return;
+ }
+}
void
-Shortread_print_barcode (FILE *fp, T this) {
+Shortread_print_barcode (Filestring_T fp, T this) {
if (this->barcode != NULL) {
- fprintf(fp,"\tXB:Z:%s",this->barcode);
+ FPRINTF(fp,"\tXB:Z:%s",this->barcode);
}
return;
}
void
-Shortread_print_chop (FILE *fp, T this, bool invertp) {
+Shortread_print_chop (Filestring_T fp, T this, bool invertp) {
int i;
if (this->chop != NULL) {
- fprintf(fp,"\tXP:Z:");
+ FPRINTF(fp,"\tXP:Z:");
if (invertp == false) {
- fprintf(fp,"%s",this->chop);
+ FPRINTF(fp,"%s",this->chop);
} else {
for (i = this->choplength - 1; i >= 0; i--) {
- fprintf(fp,"%c",complCode[(int) this->chop[i]]);
+ FPRINTF(fp,"%c",complCode[(int) this->chop[i]]);
}
}
}
@@ -3435,23 +5286,23 @@ Shortread_print_chop (FILE *fp, T this, bool invertp) {
void
-Shortread_print_chop_symbols (FILE *fp, T this) {
+Shortread_print_chop_symbols (Filestring_T fp, T this) {
int i;
for (i = 0; i < this->choplength; i++) {
- fprintf(fp,"*");
+ FPRINTF(fp,"*");
}
return;
}
void
-Shortread_print_quality (FILE *fp, T this, int hardclip_low, int hardclip_high,
+Shortread_print_quality (Filestring_T fp, T this, int hardclip_low, int hardclip_high,
int shift, bool show_chopped_p) {
int i;
int c;
if (this->quality == NULL) {
- fprintf(fp,"*");
+ FPRINTF(fp,"*");
} else {
for (i = hardclip_low; i < this->fulllength - hardclip_high; i++) {
if ((c = this->quality[i] + shift) <= 32) {
@@ -3459,7 +5310,7 @@ Shortread_print_quality (FILE *fp, T this, int hardclip_low, int hardclip_high,
shift,this->quality[i]);
abort();
} else {
- fprintf(fp,"%c",c);
+ FPRINTF(fp,"%c",c);
}
}
@@ -3471,7 +5322,7 @@ Shortread_print_quality (FILE *fp, T this, int hardclip_low, int hardclip_high,
shift,this->chop_quality[i]);
abort();
} else {
- fprintf(fp,"%c",c);
+ FPRINTF(fp,"%c",c);
}
}
}
@@ -3482,13 +5333,13 @@ Shortread_print_quality (FILE *fp, T this, int hardclip_low, int hardclip_high,
}
void
-Shortread_print_quality_revcomp (FILE *fp, T this, int hardclip_low, int hardclip_high,
+Shortread_print_quality_revcomp (Filestring_T fp, T this, int hardclip_low, int hardclip_high,
int shift, bool show_chopped_p) {
int i;
int c;
if (this->quality == NULL) {
- fprintf(fp,"*");
+ FPRINTF(fp,"*");
} else {
for (i = this->fulllength - 1 - hardclip_low; i >= hardclip_high; --i) {
if ((c = this->quality[i] + shift) <= 32) {
@@ -3496,7 +5347,7 @@ Shortread_print_quality_revcomp (FILE *fp, T this, int hardclip_low, int hardcli
shift,this->quality[i]);
abort();
} else {
- fprintf(fp,"%c",c);
+ FPRINTF(fp,"%c",c);
}
}
@@ -3508,7 +5359,7 @@ Shortread_print_quality_revcomp (FILE *fp, T this, int hardclip_low, int hardcli
shift,this->chop_quality[i]);
abort();
} else {
- fprintf(fp,"%c",c);
+ FPRINTF(fp,"%c",c);
}
}
}
@@ -3518,21 +5369,21 @@ Shortread_print_quality_revcomp (FILE *fp, T this, int hardclip_low, int hardcli
}
void
-Shortread_print_oneline_uc (FILE *fp, T this) {
+Shortread_print_oneline_uc (Filestring_T fp, T this) {
int i = 0;
for (i = 0; i < this->fulllength; i++) {
- fprintf(fp,"%c",this->contents_uc[i]);
+ FPRINTF(fp,"%c",this->contents_uc[i]);
}
return;
}
void
-Shortread_print_oneline_revcomp_uc (FILE *fp, T this) {
+Shortread_print_oneline_revcomp_uc (Filestring_T fp, T this) {
int i = 0;
for (i = this->fulllength-1; i >= 0; --i) {
- fprintf(fp,"%c",complCode[(int) this->contents_uc[i]]);
+ FPRINTF(fp,"%c",complCode[(int) this->contents_uc[i]]);
}
return;
}
diff --git a/src/shortread.h b/src/shortread.h
index f487c76..706dc2c 100644
--- a/src/shortread.h
+++ b/src/shortread.h
@@ -1,8 +1,17 @@
-/* $Id: shortread.h 149319 2014-09-30 02:15:42Z twu $ */
+/* $Id: shortread.h 157572 2015-01-28 00:05:22Z twu $ */
#ifndef SHORTREAD_INCLUDED
#define SHORTREAD_INCLUDED
+#ifdef HAVE_CONFIG_H
+#include <config.h> /* For HAVE_ZLIB, HAVE_BZLIB, USE_MPI_FILE_INPUT */
+#endif
+
#include <stdio.h>
#include "bool.h"
+#include "filestring.h"
+
+#if defined(USE_MPI) && defined(USE_MPI_FILE_INPUT)
+#include <mpi.h>
+#endif
#ifdef HAVE_ZLIB
#include <zlib.h>
@@ -19,7 +28,8 @@ typedef struct T *T;
extern void
Shortread_setup (int acc_fieldi_start_in, int acc_fieldi_end_in,
bool force_singled_end_p_in, bool filter_chastity_p_in,
- bool allow_paired_end_mismatch_p_in);
+ bool allow_paired_end_mismatch_p_in, bool fastq_format_p_in,
+ int barcode_length_in, bool invert_first_p_in, bool invert_second_p_in);
extern char *
Shortread_accession (T this);
@@ -30,8 +40,13 @@ Shortread_filterp (T this);
extern bool
Shortread_invertedp (T this);
+#if 0
+extern unsigned long long **
+Shortread_input_divide (bool *fastq_format_p, char **files, int nfiles, int naliquots);
+#endif
+
extern int
-Shortread_input_init (FILE *fp);
+Shortread_input_init (int *nchars, FILE *fp);
#ifdef HAVE_ZLIB
extern int
@@ -91,82 +106,136 @@ Shortread_new (char *acc, char *restofheader, bool filterp,
int barcode_length, bool invertp, bool copy_acc_p, bool skipp);
extern T
-Shortread_read_fasta_shortreads (int *nextchar, T *queryseq2, FILE **input1, FILE **input2,
- char ***files, int *nfiles, bool skipp,
- int barcode_length, bool invert_first_p, bool invert_second_p);
-extern T
-Shortread_read_fastq_shortreads (int *nextchar, T *queryseq2, FILE **input1, FILE **input2,
- char ***files, int *nfiles, bool skipp,
- int barcode_length, bool invert_first_p, bool invert_second_p);
+Shortread_read_fastq_text (int *nextchar, int *nchars1, int *nchars2, T *queryseq2,
+ FILE **input1, FILE **input2,
+ char ***files, int *nfiles, bool skipp);
#ifdef HAVE_ZLIB
extern T
-Shortread_read_fasta_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input1, gzFile *input2,
- char ***files, int *nfiles, bool skipp,
- int barcode_length, bool invert_first_p, bool invert_second_p);
+Shortread_read_fastq_gzip (int *nextchar, T *queryseq2,
+#ifdef USE_MPI
+ Filestring_T filestring1, Filestring_T filestring2,
+#endif
+ gzFile *input1, gzFile *input2,
+ char ***files, int *nfiles, bool skipp);
+#endif
+
+#ifdef HAVE_BZLIB
+extern T
+Shortread_read_fastq_bzip2 (int *nextchar, T *queryseq2,
+#ifdef USE_MPI
+ Filestring_T filestring1, Filestring_T filestring2,
+#endif
+ Bzip2_T *input1, Bzip2_T *input2,
+ char ***files, int *nfiles, bool skipp);
+#endif
+
+extern T
+Shortread_read_fasta_text (int *nextchar, int *nchars1, int *nchars2, T *queryseq2,
+ FILE **input1, FILE **input2,
+ char ***files, int *nfiles, bool skipp);
+#ifdef HAVE_ZLIB
extern T
-Shortread_read_fastq_shortreads_gzip (int *nextchar, T *queryseq2, gzFile *input1, gzFile *input2,
- char ***files, int *nfiles, bool skipp,
- int barcode_length, bool invert_first_p, bool invert_second_p);
+Shortread_read_fasta_gzip (int *nextchar, T *queryseq2,
+#ifdef USE_MPI
+ Filestring_T filestring1, Filestring_T filestring2,
+#endif
+ gzFile *input1, gzFile *input2,
+ char ***files, int *nfiles, bool skipp);
#endif
#ifdef HAVE_BZLIB
extern T
-Shortread_read_fasta_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *input1, Bzip2_T *input2,
- char ***files, int *nfiles, bool skipp,
- int barcode_length, bool invert_first_p, bool invert_second_p);
+Shortread_read_fasta_bzip2 (int *nextchar, T *queryseq2,
+#ifdef USE_MPI
+ Filestring_T filestring1, Filestring_T filestring2,
+#endif
+ Bzip2_T *input1, Bzip2_T *input2,
+ char ***files, int *nfiles, bool skipp);
+#endif
+
+
extern T
-Shortread_read_fastq_shortreads_bzip2 (int *nextchar, T *queryseq2, Bzip2_T *input1, Bzip2_T *input2,
- char ***files, int *nfiles, bool skipp,
- int barcode_length, bool invert_first_p, bool invert_second_p);
+Shortread_read (int *nextchar, int *nchars1, int *nchars2, T *queryseq2,
+#ifdef USE_MPI
+ Filestring_T filestring1, Filestring_T filestring2,
#endif
+ FILE **input1, FILE **input2,
+#ifdef HAVE_ZLIB
+ gzFile *gzipped1, gzFile *gzipped2,
+#endif
+#ifdef HAVE_BZLIB
+ Bzip2_T *bzipped1, Bzip2_T *bzipped2,
+#endif
+ char ***files, int *nfiles, bool skipp);
+#ifdef USE_MPI
+extern T
+Shortread_read_filecontents (int *nextchar, T *queryseq2,
+ char **filecontents1, char **filecontents2,
+#ifdef USE_MPI_FILE_INPUT
+ MPI_File *input1, MPI_File *input2, MPI_Comm workers_comm,
+#else
+ FILE **input1, FILE **input2,
+#endif
+ char ***files, int *nfiles, bool skipp);
+#endif
extern void
-Shortread_print_header (FILE *fp, T queryseq1, T queryseq2);
+Shortread_print_header (Filestring_T fp, T queryseq1, T queryseq2);
extern void
-Shortread_print_query_singleend_fasta (FILE *fp, T queryseq, T headerseq);
+Shortread_stderr_query_singleend_fasta (T queryseq, T headerseq);
extern void
-Shortread_print_query_singleend_fastq (FILE *fp, T queryseq, T headerseq);
+Shortread_stderr_query_pairedend_fasta (T queryseq1, T queryseq2,
+ bool invert_first_p, bool invert_second_p);
+
extern void
-Shortread_print_query_pairedend_fasta (FILE *fp, T queryseq1, T queryseq2,
- bool invert_first_p, bool invert_second_p);
+Shortread_print_query_singleend_fastq (Filestring_T fp, T queryseq, T headerseq);
extern void
-Shortread_print_query_pairedend_fastq (FILE *fp1, FILE *fp2, T queryseq1, T queryseq2,
+Shortread_print_query_pairedend_fastq (Filestring_T fp1, Filestring_T fp2, T queryseq1, T queryseq2,
bool invert_first_p, bool invert_second_p);
extern void
-Shortread_print_oneline (FILE *fp, T this);
+Shortread_print_query_singleend (Filestring_T fp, T queryseq, T headerseq);
extern void
-Shortread_print_oneline_revcomp (FILE *fp, T this);
+Shortread_print_query_pairedend (Filestring_T fp1, Filestring_T fp2, T queryseq1, T queryseq2);
extern void
-Shortread_print_chopped (FILE *fp, T this, int hardclip_low, int hardclip_high);
+Shortread_print_oneline (Filestring_T fp, T this);
+extern void
+Shortread_print_oneline_revcomp (Filestring_T fp, T this);
+
+extern void
+Shortread_print_chopped_sam (Filestring_T fp, T this, int hardclip_low, int hardclip_high);
+extern void
+Shortread_print_chopped_revcomp_sam (Filestring_T fp, T this, int hardclip_low, int hardclip_high);
+extern void
+Shortread_print_chopped_end (Filestring_T fp, T this, int hardclip_low, int hardclip_high);
extern void
-Shortread_print_chopped_revcomp (FILE *fp, T this, int hardclip_low, int hardclip_high);
+Shortread_print_chopped_end_revcomp (Filestring_T fp, T this, int hardclip_low, int hardclip_high);
extern void
-Shortread_print_chopped_end (FILE *fp, T this, int hardclip_low, int hardclip_high);
+Shortread_print_chopped_end_quality (Filestring_T fp, T this, int hardclip_low, int hardclip_high);
extern void
-Shortread_print_chopped_end_revcomp (FILE *fp, T this, int hardclip_low, int hardclip_high);
+Shortread_print_chopped_end_quality_reverse (Filestring_T fp, T this, int hardclip_low, int hardclip_high);
extern void
-Shortread_print_barcode (FILE *fp, T this);
+Shortread_print_barcode (Filestring_T fp, T this);
extern void
-Shortread_print_chop (FILE *fp, T this, bool invertp);
+Shortread_print_chop (Filestring_T fp, T this, bool invertp);
extern void
-Shortread_print_chop_symbols (FILE *fp, T this);
+Shortread_print_chop_symbols (Filestring_T fp, T this);
extern void
-Shortread_print_quality (FILE *fp, T this, int hardclip_low, int hardclip_high,
+Shortread_print_quality (Filestring_T fp, T this, int hardclip_low, int hardclip_high,
int shift, bool show_chopped_p);
extern void
-Shortread_print_quality_revcomp (FILE *fp, T this, int hardclip_low, int hardclip_high,
+Shortread_print_quality_revcomp (Filestring_T fp, T this, int hardclip_low, int hardclip_high,
int shift, bool show_chopped_p);
extern void
-Shortread_print_oneline_uc (FILE *fp, T this);
+Shortread_print_oneline_uc (Filestring_T fp, T this);
extern void
-Shortread_print_oneline_revcomp_uc (FILE *fp, T this);
+Shortread_print_oneline_revcomp_uc (Filestring_T fp, T this);
#undef T
#endif
diff --git a/src/snpindex.c b/src/snpindex.c
index 8734d46..bf07662 100644
--- a/src/snpindex.c
+++ b/src/snpindex.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: snpindex.c 153955 2014-11-24 17:54:45Z twu $";
+static char rcsid[] = "$Id: snpindex.c 161940 2015-03-25 20:36:59Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1024,6 +1024,8 @@ main (int argc, char *argv[]) {
#ifdef EXTRA_ALLOCATION
Positionsptr_T npositions;
#endif
+
+ int shmid;
unsigned char *ref_positions8_high;
UINT4 *ref_positions8_low;
UINT8 *snp_positions8, *ref_positions8;
@@ -1153,7 +1155,7 @@ main (int argc, char *argv[]) {
IIT_dump_divstrings(stderr,snps_iit);
genome = Genome_new(sourcedir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- /*uncompressedp*/false,/*access*/USE_MMAP_ONLY);
+ /*uncompressedp*/false,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
/* Copy genome */
nblocks = Genome_totallength(genome)/32U;
@@ -1313,10 +1315,11 @@ main (int argc, char *argv[]) {
ref_positions8_low = (UINT4 *) Access_mmap(&ref_positions_low_fd,&ref_positions_low_len,
filenames->positions_low_filename,sizeof(UINT4),/*randomp*/false);
#else
- ref_positions8_high = (unsigned char *) Access_allocated(&ref_positions_high_len,&seconds,
- filenames->positions_high_filename,sizeof(unsigned char));
- ref_positions8_low = (UINT4 *) Access_allocated(&ref_positions_low_len,&seconds,
- filenames->positions_low_filename,sizeof(UINT4));
+ ref_positions8_high = (unsigned char *) Access_allocate(&shmid,&ref_positions_high_len,&seconds,
+ filenames->positions_high_filename,sizeof(unsigned char),
+ /*sharedp*/false);
+ ref_positions8_low = (UINT4 *) Access_allocate(&shmid,&ref_positions_low_len,&seconds,
+ filenames->positions_low_filename,sizeof(UINT4),/*sharedp*/false);
#endif
/* Unpack */
totalcounts = ref_positions_high_len/sizeof(unsigned char);
@@ -1343,8 +1346,8 @@ main (int argc, char *argv[]) {
ref_positions4 = (UINT4 *) Access_mmap(&ref_positions_low_fd,&ref_positions_low_len,
filenames->positions_low_filename,sizeof(UINT4),/*randomp*/false);
#else
- ref_positions4 = (UINT4 *) Access_allocated(&ref_positions_low_len,&seconds,
- filenames->positions_low_filename,sizeof(UINT4));
+ ref_positions4 = (UINT4 *) Access_allocate(&shmid,&ref_positions_low_len,&seconds,
+ filenames->positions_low_filename,sizeof(UINT4),/*sharedp*/false);
#endif
}
diff --git a/src/splice.c b/src/splice.c
index 90aad69..3e68052 100644
--- a/src/splice.c
+++ b/src/splice.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: splice.c 154778 2014-12-06 03:32:33Z twu $";
+static char rcsid[] = "$Id: splice.c 166641 2015-05-29 21:13:04Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -19,7 +19,12 @@ static char rcsid[] = "$Id: splice.c 154778 2014-12-06 03:32:33Z twu $";
#define LOWPROB_SUPPORT 20
+#if 0
+/* Creates issues with ambiguous substrings */
#define LOCALSPLICING_NMATCHES_SLOP 1
+#else
+#define LOCALSPLICING_NMATCHES_SLOP 0
+#endif
#define LOCALSPLICING_PROB_SLOP 0.05
@@ -80,6 +85,600 @@ sufficient_splice_prob_local (int support, int nmismatches, double spliceprob) {
and values greater than 0 represent a known site. Need to subtract
1 to obtain joffset + j. */
+/* Called only by sarray-read.c, where plusp is always true */
+int
+Splice_resolve_sense (int *best_knowni_i, int *best_knowni_j,
+ int *best_nmismatches_i, int *best_nmismatches_j,
+ double *best_prob_i, double *best_prob_j,
+
+ Univcoord_T segmenti_left, Univcoord_T segmentj_left,
+ Univcoord_T segmenti_chroffset, Univcoord_T segmentj_chroffset,
+
+ int querystart, int queryend, int querylength, Compress_T query_compress,
+ int *segmenti_donor_knownpos, int *segmentj_acceptor_knownpos,
+ int *segmentj_antidonor_knownpos, int *segmenti_antiacceptor_knownpos,
+ int *segmenti_donor_knowni, int *segmentj_acceptor_knowni,
+ int *segmentj_antidonor_knowni, int *segmenti_antiacceptor_knowni,
+ int segmenti_donor_nknown, int segmentj_acceptor_nknown,
+ int segmentj_antidonor_nknown, int segmenti_antiacceptor_nknown,
+ int splicing_penalty, int max_mismatches_allowed,
+ bool plusp, int genestrand, bool first_read_p) {
+ int best_splice_pos = -1, splice_pos_start, splice_pos_end, splice_pos, i, j;
+
+ int best_nmismatches, nmismatches;
+ int best_segmenti_nmismatches, best_segmentj_nmismatches, segmenti_nmismatches, segmentj_nmismatches;
+ Univcoord_T best_donor_splicecoord, best_acceptor_splicecoord;
+ int best_donor_knowni, best_acceptor_knowni;
+ double best_prob, best_donor_prob, best_acceptor_prob, probi, probj;
+ /* bool sufficient1p, sufficient2p; */
+
+ int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites;
+ int *donori_positions, *acceptorj_positions, *antiacceptori_positions, *antidonorj_positions;
+ int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni;
+
+#ifdef HAVE_ALLOCA
+ int *donor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int));
+ int *acceptor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int));
+ int *donor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int));
+ int *acceptor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int));
+#else
+ int donor_positions_alloc[MAX_READLENGTH+1], acceptor_positions_alloc[MAX_READLENGTH+1];
+ int donor_knowni_alloc[MAX_READLENGTH+1], acceptor_knowni_alloc[MAX_READLENGTH+1];
+#endif
+
+
+ debug1(printf("Splice_resolve_sense: Getting genome at lefti %u and leftj %u (diff: %d), range %d..%d\n",
+ segmenti_left,segmentj_left,segmentj_left-segmenti_left,querystart,queryend));
+
+ *best_knowni_i = *best_knowni_j = -1;
+ *best_nmismatches_i = *best_nmismatches_j = 0;
+ *best_prob_i = *best_prob_j = 0.0;
+
+ splice_pos_start = querystart;
+ splice_pos_end = queryend;
+
+ if (plusp == true) {
+ /* Originally from plus strand. No complement. */
+ /* Sense (End 1 to End 2) or Antisense (End 5 to End 6) */
+ if (novelsplicingp && segmenti_left + splice_pos_start >= DONOR_MODEL_LEFT_MARGIN) {
+ donori_nsites = Genome_donor_positions(donor_positions_alloc,donor_knowni_alloc,
+ segmenti_donor_knownpos,segmenti_donor_knowni,
+ segmenti_left,splice_pos_start,splice_pos_end);
+ donori_positions = donor_positions_alloc;
+ donori_knowni = donor_knowni_alloc;
+ } else {
+ donori_nsites = segmenti_donor_nknown;
+ donori_positions = segmenti_donor_knownpos;
+ donori_knowni = segmenti_donor_knowni;
+ }
+
+#ifdef DEBUG1
+ printf("Found %d donori sites:",donori_nsites);
+ for (i = 0; i < donori_nsites; i++) {
+ printf(" %d",donori_positions[i]);
+ if (donori_knowni[i] >= 0) {
+ printf(" (%d)",donori_knowni[i]);
+ }
+ }
+ printf("\n");
+#endif
+
+ if (novelsplicingp && segmentj_left + splice_pos_start >= ACCEPTOR_MODEL_LEFT_MARGIN) {
+ acceptorj_nsites = Genome_acceptor_positions(acceptor_positions_alloc,acceptor_knowni_alloc,
+ segmentj_acceptor_knownpos,segmentj_acceptor_knowni,
+ segmentj_left,splice_pos_start,splice_pos_end);
+ acceptorj_positions = acceptor_positions_alloc;
+ acceptorj_knowni = acceptor_knowni_alloc;
+ } else {
+ acceptorj_nsites = segmentj_acceptor_nknown;
+ acceptorj_positions = segmentj_acceptor_knownpos;
+ acceptorj_knowni = segmentj_acceptor_knowni;
+ }
+
+#ifdef DEBUG1
+ printf("Found %d acceptorj sites:",acceptorj_nsites);
+ for (i = 0; i < acceptorj_nsites; i++) {
+ printf(" %d",acceptorj_positions[i]);
+ if (acceptorj_knowni[i] >= 0) {
+ printf(" (%d)",acceptorj_knowni[i]);
+ }
+ }
+ printf("\n");
+#endif
+
+ best_nmismatches = max_mismatches_allowed;
+ best_prob = 0.0;
+
+ i = j = 0;
+ while (i < donori_nsites && j < acceptorj_nsites) {
+ if ((splice_pos = donori_positions[i]) < acceptorj_positions[j]) {
+ i++;
+ } else if (splice_pos > acceptorj_positions[j]) {
+ j++;
+ } else {
+ segmenti_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmenti_left,/*pos5*/querystart,/*pos3*/splice_pos,
+ plusp,genestrand,first_read_p);
+ segmentj_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmentj_left,/*pos5*/splice_pos,/*pos3*/queryend,
+ plusp,genestrand,first_read_p);
+ if ((nmismatches = segmenti_nmismatches + segmentj_nmismatches) <= best_nmismatches) {
+ if (donori_knowni[i] >= 0) {
+ probi = 1.0; /* Needs to be 1.0 for output */
+ } else {
+ probi = Maxent_hr_donor_prob(segmenti_left + splice_pos,segmenti_chroffset);
+ }
+
+ if (acceptorj_knowni[j] >= 0) {
+ probj = 1.0; /* Needs to be 1.0 for output */
+ } else {
+ probj = Maxent_hr_acceptor_prob(segmentj_left + splice_pos,segmentj_chroffset);
+ }
+
+ debug1(
+ if (plusp == true) {
+ printf("plus sense splice_pos %d, i.donor %f, j.acceptor %f\n",splice_pos,probi,probj);
+ } else {
+ printf("minus antisense splice_pos %d, i.donor %f, j.acceptor %f\n",splice_pos,probi,probj);
+ });
+
+
+#if 0
+ sufficient1p = sufficient_splice_prob_local(/*support*/splice_pos,segmenti_nmismatches,probi);
+ sufficient2p = sufficient_splice_prob_local(/*support*/querylength - splice_pos,segmentj_nmismatches,probj);
+#endif
+
+ /* if (sufficient1p && sufficient2p) { */
+ if (nmismatches < best_nmismatches ||
+ (nmismatches == best_nmismatches && probi + probj > best_prob)) {
+ /* Success */
+ best_nmismatches = nmismatches;
+ best_prob = probi + probj;
+
+ /* best_donor_splicecoord = segmenti_left + splice_pos; */
+ /* best_acceptor_splicecoord = segmentj_left + splice_pos; */
+ *best_knowni_i = donori_knowni[i];
+ *best_knowni_j = acceptorj_knowni[j];
+ *best_prob_i = probi; /* donor_prob */
+ *best_prob_j = probj; /* acceptor_prob */
+ best_splice_pos = splice_pos;
+ *best_nmismatches_i = segmenti_nmismatches;
+ *best_nmismatches_j = segmentj_nmismatches;
+ }
+ /* } */
+ }
+ i++;
+ j++;
+ }
+ }
+
+ } else {
+ /* minus */
+ /* Originally from minus strand. Complement. */
+ /* Antisense (End 7 to End 8) or Sense (End 3 to End 4) */
+ if (novelsplicingp && segmenti_left + splice_pos_start >= ACCEPTOR_MODEL_RIGHT_MARGIN) {
+ antiacceptori_nsites = Genome_antiacceptor_positions(acceptor_positions_alloc,acceptor_knowni_alloc,
+ segmenti_antiacceptor_knownpos,segmenti_antiacceptor_knowni,
+ segmenti_left,splice_pos_start,splice_pos_end);
+ antiacceptori_positions = acceptor_positions_alloc;
+ antiacceptori_knowni = acceptor_knowni_alloc;
+ } else {
+ antiacceptori_nsites = segmenti_antiacceptor_nknown;
+ antiacceptori_positions = segmenti_antiacceptor_knownpos;
+ antiacceptori_knowni = segmenti_antiacceptor_knowni;
+ }
+
+#ifdef DEBUG1
+ printf("Found %d antiacceptori sites:",antiacceptori_nsites);
+ for (i = 0; i < antiacceptori_nsites; i++) {
+ printf(" %d",antiacceptori_positions[i]);
+ if (antiacceptori_knowni[i] >= 0) {
+ printf(" (%d)",antiacceptori_knowni[i]);
+ }
+ }
+ printf("\n");
+#endif
+
+ if (novelsplicingp && segmentj_left + splice_pos_start >= DONOR_MODEL_RIGHT_MARGIN) {
+ antidonorj_nsites = Genome_antidonor_positions(donor_positions_alloc,donor_knowni_alloc,
+ segmentj_antidonor_knownpos,segmentj_antidonor_knowni,
+ segmentj_left,splice_pos_start,splice_pos_end);
+ antidonorj_positions = donor_positions_alloc;
+ antidonorj_knowni = donor_knowni_alloc;
+ } else {
+ antidonorj_nsites = segmentj_antidonor_nknown;
+ antidonorj_positions = segmentj_antidonor_knownpos;
+ antidonorj_knowni = segmentj_antidonor_knowni;
+ }
+
+#ifdef DEBUG1
+ printf("Found %d antidonorj sites:",antidonorj_nsites);
+ for (i = 0; i < antidonorj_nsites; i++) {
+ printf(" %d",antidonorj_positions[i]);
+ if (antidonorj_knowni[i] >= 0) {
+ printf(" (%d)",antidonorj_knowni[i]);
+ }
+ }
+ printf("\n");
+#endif
+
+ best_nmismatches = max_mismatches_allowed;
+ best_prob = 0.0;
+
+ i = j = 0;
+ while (i < antiacceptori_nsites && j < antidonorj_nsites) {
+ if ((splice_pos = antiacceptori_positions[i]) < antidonorj_positions[j]) {
+ i++;
+ } else if (splice_pos > antidonorj_positions[j]) {
+ j++;
+ } else {
+ segmenti_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmenti_left,/*pos5*/querystart,/*pos3*/splice_pos,
+ plusp,genestrand,first_read_p);
+ segmentj_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmentj_left,/*pos5*/splice_pos,/*pos3*/queryend,
+ plusp,genestrand,first_read_p);
+ if ((nmismatches = segmenti_nmismatches + segmentj_nmismatches) <= best_nmismatches) {
+ if (antiacceptori_knowni[i] >= 0) {
+ probi = 1.0; /* Needs to be 1.0 for output */
+ } else {
+ probi = Maxent_hr_antiacceptor_prob(segmenti_left + splice_pos,segmenti_chroffset);
+ }
+
+ if (antidonorj_knowni[j] >= 0) {
+ probj = 1.0; /* Needs to be 1.0 for output */
+ } else {
+ probj = Maxent_hr_antidonor_prob(segmentj_left + splice_pos,segmentj_chroffset);
+ }
+
+ debug1(
+ if (plusp == true) {
+ printf("plus antisense splice_pos %d, j.donor %f, i.acceptor %f\n",splice_pos,probj,probi);
+ } else {
+ printf("minus sense splice_pos %d, j.donor %f, i.acceptor %f\n",splice_pos,probj,probi);
+ });
+
+#if 0
+ sufficient1p = sufficient_splice_prob_local(/*support*/splice_pos,segmenti_nmismatches,probi);
+ sufficient2p = sufficient_splice_prob_local(/*support*/querylength - splice_pos,segmentj_nmismatches,probj);
+#endif
+
+ /* if (sufficient1p && sufficient2p) { */
+ if (nmismatches < best_nmismatches ||
+ (nmismatches == best_nmismatches && probi + probj > best_prob)) {
+ /* Success */
+ best_nmismatches = nmismatches;
+ best_prob = probi + probj;
+
+ /* best_donor_splicecoord = segmentj_left + splice_pos; */
+ /* best_acceptor_splicecoord = segmenti_left + splice_pos; */
+ *best_knowni_j = antidonorj_knowni[j];
+ *best_knowni_i = antiacceptori_knowni[i];
+ *best_prob_j = probj; /* donor_prob */
+ *best_prob_i = probi;
+ best_splice_pos = splice_pos;
+ *best_nmismatches_j = segmentj_nmismatches;
+ *best_nmismatches_i = segmenti_nmismatches;
+ }
+ /* } */
+ }
+ i++;
+ j++;
+ }
+ }
+ }
+
+ if (*best_prob_i > 0.95 && *best_prob_j > 0.70) {
+ debug1(printf("Returning %d with probi %f and probj %f\n",best_splice_pos,*best_prob_i,*best_prob_j));
+ return best_splice_pos;
+ } else if (*best_prob_i > 0.70 && *best_prob_j > 0.95) {
+ debug1(printf("Returning %d with probi %f and probj %f\n",best_splice_pos,*best_prob_i,*best_prob_j));
+ return best_splice_pos;
+ } else if (*best_prob_i > 0.80 && *best_prob_j > 0.85) {
+ debug1(printf("Returning %d with probi %f and probj %f\n",best_splice_pos,*best_prob_i,*best_prob_j));
+ return best_splice_pos;
+ } else {
+ debug1(printf("Not returning %d with probi %f and probj %f\n",best_splice_pos,*best_prob_i,*best_prob_j));
+ return -1;
+ }
+}
+
+
+/* Called only by sarray-read.c, where plusp is always true */
+int
+Splice_resolve_antisense (int *best_knowni_i, int *best_knowni_j,
+ int *best_nmismatches_i, int *best_nmismatches_j,
+ double *best_prob_i, double *best_prob_j,
+
+ Univcoord_T segmenti_left, Univcoord_T segmentj_left,
+ Univcoord_T segmenti_chroffset, Univcoord_T segmentj_chroffset,
+
+ int querystart, int queryend, int querylength, Compress_T query_compress,
+ int *segmenti_donor_knownpos, int *segmentj_acceptor_knownpos,
+ int *segmentj_antidonor_knownpos, int *segmenti_antiacceptor_knownpos,
+ int *segmenti_donor_knowni, int *segmentj_acceptor_knowni,
+ int *segmentj_antidonor_knowni, int *segmenti_antiacceptor_knowni,
+ int segmenti_donor_nknown, int segmentj_acceptor_nknown,
+ int segmentj_antidonor_nknown, int segmenti_antiacceptor_nknown,
+ int splicing_penalty, int max_mismatches_allowed,
+ bool plusp, int genestrand, bool first_read_p) {
+ int best_splice_pos = -1, splice_pos_start, splice_pos_end, splice_pos, i, j;
+
+ int best_nmismatches, nmismatches;
+ int best_segmenti_nmismatches, best_segmentj_nmismatches, segmenti_nmismatches, segmentj_nmismatches;
+ Univcoord_T best_donor_splicecoord, best_acceptor_splicecoord;
+ int best_donor_knowni, best_acceptor_knowni;
+ double best_prob, best_donor_prob, best_acceptor_prob, probi, probj;
+ /* bool sufficient1p, sufficient2p; */
+
+ int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites;
+ int *donori_positions, *acceptorj_positions, *antiacceptori_positions, *antidonorj_positions;
+ int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni;
+
+#ifdef HAVE_ALLOCA
+ int *donor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int));
+ int *acceptor_positions_alloc = (int *) alloca((querylength+1)*sizeof(int));
+ int *donor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int));
+ int *acceptor_knowni_alloc = (int *) alloca((querylength+1)*sizeof(int));
+#else
+ int donor_positions_alloc[MAX_READLENGTH+1], acceptor_positions_alloc[MAX_READLENGTH+1];
+ int donor_knowni_alloc[MAX_READLENGTH+1], acceptor_knowni_alloc[MAX_READLENGTH+1];
+#endif
+
+ debug1(printf("Splice_resolve_antisense: Getting genome at lefti %u and leftj %u (diff: %d), range %d..%d\n",
+ segmenti_left,segmentj_left,segmentj_left-segmenti_left,querystart,queryend));
+
+ *best_knowni_i = *best_knowni_j = -1;
+ *best_nmismatches_i = *best_nmismatches_j = 0;
+ *best_prob_i = *best_prob_j = 0.0;
+
+ splice_pos_start = querystart;
+ splice_pos_end = queryend;
+
+ if (plusp == false) {
+ /* minus */
+ /* Originally from plus strand. No complement. */
+ /* Sense (End 1 to End 2) or Antisense (End 5 to End 6) */
+ if (novelsplicingp && segmenti_left + splice_pos_start >= DONOR_MODEL_LEFT_MARGIN) {
+ donori_nsites = Genome_donor_positions(donor_positions_alloc,donor_knowni_alloc,
+ segmenti_donor_knownpos,segmenti_donor_knowni,
+ segmenti_left,splice_pos_start,splice_pos_end);
+ donori_positions = donor_positions_alloc;
+ donori_knowni = donor_knowni_alloc;
+ } else {
+ donori_nsites = segmenti_donor_nknown;
+ donori_positions = segmenti_donor_knownpos;
+ donori_knowni = segmenti_donor_knowni;
+ }
+
+#ifdef DEBUG1
+ printf("Found %d donori sites:",donori_nsites);
+ for (i = 0; i < donori_nsites; i++) {
+ printf(" %d",donori_positions[i]);
+ if (donori_knowni[i] >= 0) {
+ printf(" (%d)",donori_knowni[i]);
+ }
+ }
+ printf("\n");
+#endif
+
+ if (novelsplicingp && segmentj_left + splice_pos_start >= ACCEPTOR_MODEL_LEFT_MARGIN) {
+ acceptorj_nsites = Genome_acceptor_positions(acceptor_positions_alloc,acceptor_knowni_alloc,
+ segmentj_acceptor_knownpos,segmentj_acceptor_knowni,
+ segmentj_left,splice_pos_start,splice_pos_end);
+ acceptorj_positions = acceptor_positions_alloc;
+ acceptorj_knowni = acceptor_knowni_alloc;
+ } else {
+ acceptorj_nsites = segmentj_acceptor_nknown;
+ acceptorj_positions = segmentj_acceptor_knownpos;
+ acceptorj_knowni = segmentj_acceptor_knowni;
+ }
+
+#ifdef DEBUG1
+ printf("Found %d acceptorj sites:",acceptorj_nsites);
+ for (i = 0; i < acceptorj_nsites; i++) {
+ printf(" %d",acceptorj_positions[i]);
+ if (acceptorj_knowni[i] >= 0) {
+ printf(" (%d)",acceptorj_knowni[i]);
+ }
+ }
+ printf("\n");
+#endif
+
+ best_nmismatches = max_mismatches_allowed;
+ best_prob = 0.0;
+
+ i = j = 0;
+ while (i < donori_nsites && j < acceptorj_nsites) {
+ if ((splice_pos = donori_positions[i]) < acceptorj_positions[j]) {
+ i++;
+ } else if (splice_pos > acceptorj_positions[j]) {
+ j++;
+ } else {
+ segmenti_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmenti_left,/*pos5*/querystart,/*pos3*/splice_pos,
+ plusp,genestrand,first_read_p);
+ segmentj_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmentj_left,/*pos5*/splice_pos,/*pos3*/queryend,
+ plusp,genestrand,first_read_p);
+ if ((nmismatches = segmenti_nmismatches + segmentj_nmismatches) <= best_nmismatches) {
+ if (donori_knowni[i] >= 0) {
+ probi = 1.0; /* Needs to be 1.0 for output */
+ } else {
+ probi = Maxent_hr_donor_prob(segmenti_left + splice_pos,segmenti_chroffset);
+ }
+
+ if (acceptorj_knowni[j] >= 0) {
+ probj = 1.0; /* Needs to be 1.0 for output */
+ } else {
+ probj = Maxent_hr_acceptor_prob(segmentj_left + splice_pos,segmentj_chroffset);
+ }
+
+ debug1(
+ if (plusp == true) {
+ printf("plus sense splice_pos %d, i.donor %f, j.acceptor %f\n",splice_pos,probi,probj);
+ } else {
+ printf("minus antisense splice_pos %d, i.donor %f, j.acceptor %f\n",splice_pos,probi,probj);
+ });
+
+#if 0
+ sufficient1p = sufficient_splice_prob_local(/*support*/splice_pos,segmenti_nmismatches,probi);
+ sufficient2p = sufficient_splice_prob_local(/*support*/querylength - splice_pos,segmentj_nmismatches,probj);
+#endif
+
+ /* if (sufficient1p && sufficient2p) { */
+ if (nmismatches < best_nmismatches ||
+ (nmismatches == best_nmismatches && probi + probj > best_prob)) {
+ /* Success */
+ best_nmismatches = nmismatches;
+ best_prob = probi + probj;
+
+ /* best_donor_splicecoord = segmenti_left + splice_pos; */
+ /* best_acceptor_splicecoord = segmentj_left + splice_pos; */
+ *best_knowni_i = donori_knowni[i];
+ *best_knowni_j = acceptorj_knowni[j];
+ *best_prob_i = probi; /* donor_prob */
+ *best_prob_j = probj; /* acceptor_prob */
+ best_splice_pos = splice_pos;
+ *best_nmismatches_i = segmenti_nmismatches;
+ *best_nmismatches_j = segmentj_nmismatches;
+ }
+ /* } */
+ }
+ i++;
+ j++;
+ }
+ }
+
+ } else {
+ /* plus */
+ /* Originally from minus strand. Complement. */
+ /* Antisense (End 7 to End 8) or Sense (End 3 to End 4) */
+ if (novelsplicingp && segmenti_left + splice_pos_start >= ACCEPTOR_MODEL_RIGHT_MARGIN) {
+ antiacceptori_nsites = Genome_antiacceptor_positions(acceptor_positions_alloc,acceptor_knowni_alloc,
+ segmenti_antiacceptor_knownpos,segmenti_antiacceptor_knowni,
+ segmenti_left,splice_pos_start,splice_pos_end);
+ antiacceptori_positions = acceptor_positions_alloc;
+ antiacceptori_knowni = acceptor_knowni_alloc;
+ } else {
+ antiacceptori_nsites = segmenti_antiacceptor_nknown;
+ antiacceptori_positions = segmenti_antiacceptor_knownpos;
+ antiacceptori_knowni = segmenti_antiacceptor_knowni;
+ }
+
+#ifdef DEBUG1
+ printf("Found %d antiacceptori sites:",antiacceptori_nsites);
+ for (i = 0; i < antiacceptori_nsites; i++) {
+ printf(" %d",antiacceptori_positions[i]);
+ if (antiacceptori_knowni[i] >= 0) {
+ printf(" (%d)",antiacceptori_knowni[i]);
+ }
+ }
+ printf("\n");
+#endif
+
+ if (novelsplicingp && segmentj_left + splice_pos_start >= DONOR_MODEL_RIGHT_MARGIN) {
+ antidonorj_nsites = Genome_antidonor_positions(donor_positions_alloc,donor_knowni_alloc,
+ segmentj_antidonor_knownpos,segmentj_antidonor_knowni,
+ segmentj_left,splice_pos_start,splice_pos_end);
+ antidonorj_positions = donor_positions_alloc;
+ antidonorj_knowni = donor_knowni_alloc;
+ } else {
+ antidonorj_nsites = segmentj_antidonor_nknown;
+ antidonorj_positions = segmentj_antidonor_knownpos;
+ antidonorj_knowni = segmentj_antidonor_knowni;
+ }
+
+#ifdef DEBUG1
+ printf("Found %d antidonorj sites:",antidonorj_nsites);
+ for (i = 0; i < antidonorj_nsites; i++) {
+ printf(" %d",antidonorj_positions[i]);
+ if (antidonorj_knowni[i] >= 0) {
+ printf(" (%d)",antidonorj_knowni[i]);
+ }
+ }
+ printf("\n");
+#endif
+
+ best_nmismatches = max_mismatches_allowed;
+ best_prob = 0.0;
+
+ i = j = 0;
+ while (i < antiacceptori_nsites && j < antidonorj_nsites) {
+ if ((splice_pos = antiacceptori_positions[i]) < antidonorj_positions[j]) {
+ i++;
+ } else if (splice_pos > antidonorj_positions[j]) {
+ j++;
+ } else {
+ segmenti_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmenti_left,/*pos5*/querystart,/*pos3*/splice_pos,
+ plusp,genestrand,first_read_p);
+ segmentj_nmismatches = Genome_count_mismatches_substring(query_compress,/*left*/segmentj_left,/*pos5*/splice_pos,/*pos3*/queryend,
+ plusp,genestrand,first_read_p);
+ if ((nmismatches = segmenti_nmismatches + segmentj_nmismatches) <= best_nmismatches) {
+ if (antiacceptori_knowni[i] >= 0) {
+ probi = 1.0; /* Needs to be 1.0 for output */
+ } else {
+ probi = Maxent_hr_antiacceptor_prob(segmenti_left + splice_pos,segmenti_chroffset);
+ }
+
+ if (antidonorj_knowni[j] >= 0) {
+ probj = 1.0; /* Needs to be 1.0 for output */
+ } else {
+ probj = Maxent_hr_antidonor_prob(segmentj_left + splice_pos,segmentj_chroffset);
+ }
+
+ debug1(
+ if (plusp == true) {
+ printf("plus antisense splice_pos %d, j.donor %f, i.acceptor %f\n",splice_pos,probj,probi);
+ } else {
+ printf("minus sense splice_pos %d, j.donor %f, i.acceptor %f\n",splice_pos,probj,probi);
+ });
+
+#if 0
+ sufficient1p = sufficient_splice_prob_local(/*support*/splice_pos,segmenti_nmismatches,probi);
+ sufficient2p = sufficient_splice_prob_local(/*support*/querylength - splice_pos,segmentj_nmismatches,probj);
+#endif
+
+ /* if (sufficient1p && sufficient2p) { */
+ if (nmismatches < best_nmismatches ||
+ (nmismatches == best_nmismatches && probi + probj > best_prob)) {
+ /* Success */
+ best_nmismatches = nmismatches;
+ best_prob = probi + probj;
+
+ /* best_donor_splicecoord = segmentj_left + splice_pos; */
+ /* best_acceptor_splicecoord = segmenti_left + splice_pos; */
+ *best_knowni_j = antidonorj_knowni[j];
+ *best_knowni_i = antiacceptori_knowni[i];
+ *best_prob_j = probj; /* donor_prob */
+ *best_prob_i = probi; /* acceptor_prob */
+ best_splice_pos = splice_pos;
+ *best_nmismatches_j = segmentj_nmismatches;
+ *best_nmismatches_i = segmenti_nmismatches;
+ }
+ /* } */
+ }
+ i++;
+ j++;
+ }
+ }
+ }
+
+ if (*best_prob_i > 0.95 && *best_prob_j > 0.70) {
+ debug1(printf("Returning %d with probi %f and probj %f\n",best_splice_pos,*best_prob_i,*best_prob_j));
+ return best_splice_pos;
+ } else if (*best_prob_i > 0.70 && *best_prob_j > 0.95) {
+ debug1(printf("Returning %d with probi %f and probj %f\n",best_splice_pos,*best_prob_i,*best_prob_j));
+ return best_splice_pos;
+ } else if (*best_prob_i > 0.80 && *best_prob_j > 0.85) {
+ debug1(printf("Returning %d with probi %f and probj %f\n",best_splice_pos,*best_prob_i,*best_prob_j));
+ return best_splice_pos;
+ } else {
+ debug1(printf("Not returning %d with probi %f and probj %f\n",best_splice_pos,*best_prob_i,*best_prob_j));
+ return -1;
+ }
+}
+
+
+
+/* Note: knowni holds joffset + j + 1, so 0 represents no known site
+ and values greater than 0 represent a known site. Need to subtract
+ 1 to obtain joffset + j. */
+
List_T
Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lowprob,
@@ -109,7 +708,7 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
Univcoord_T best_donor_splicecoord, best_acceptor_splicecoord;
int best_donor_knowni, best_acceptor_knowni;
double best_prob, best_donor_prob, best_acceptor_prob, probi, probj;
- bool sufficient1p, sufficient2p, orig_plusp, sensep;
+ bool sufficient1p, sufficient2p, orig_plusp;
int sensedir;
int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites;
@@ -386,20 +985,19 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
best_prob,best_splice_pos,best_donor_splicecoord,best_acceptor_splicecoord));
if (orig_plusp == true) {
/* Originally from plus strand. No complement. */
- sensep = (plusp == true) ? true : false;
sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
assert(sensedir == SENSE_FORWARD);
donor = Substring_new_donor(best_donor_splicecoord,best_donor_knowni,
best_splice_pos,best_segmenti_nmismatches,
best_donor_prob,/*left*/segmenti_left,query_compress,
- querylength,plusp,genestrand,first_read_p,sensep,
+ querylength,plusp,genestrand,first_read_p,sensedir,
segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
acceptor = Substring_new_acceptor(best_acceptor_splicecoord,best_acceptor_knowni,
best_splice_pos,best_segmentj_nmismatches,
best_acceptor_prob,/*left*/segmentj_left,query_compress,
- querylength,plusp,genestrand,first_read_p,sensep,
+ querylength,plusp,genestrand,first_read_p,sensedir,
segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
if (donor == NULL || acceptor == NULL) {
@@ -417,7 +1015,8 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
if (sufficient1p && sufficient2p) {
*nhits += 1;
return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches,
- donor,acceptor,/*distance*/segmentj_left - segmenti_left,
+ donor,acceptor,best_donor_prob,best_acceptor_prob,
+ /*distance*/segmentj_left - segmenti_left,
/*shortdistancep*/true,splicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -436,7 +1035,8 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
} else if (sufficient1p || sufficient2p) {
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches,
- donor,acceptor,/*distance*/segmentj_left - segmenti_left,
+ donor,acceptor,best_donor_prob,best_acceptor_prob,
+ /*distance*/segmentj_left - segmenti_left,
/*shortdistancep*/true,splicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -453,20 +1053,19 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
} else {
/* Originally from minus strand. Complement. */
- sensep = (plusp == true) ? false : true;
sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
assert(sensedir == SENSE_FORWARD);
donor = Substring_new_donor(best_donor_splicecoord,best_donor_knowni,
best_splice_pos,best_segmentj_nmismatches,
best_donor_prob,/*left*/segmentj_left,query_compress,
- querylength,plusp,genestrand,first_read_p,sensep,
+ querylength,plusp,genestrand,first_read_p,sensedir,
segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
acceptor = Substring_new_acceptor(best_acceptor_splicecoord,best_acceptor_knowni,
best_splice_pos,best_segmenti_nmismatches,
best_acceptor_prob,/*left*/segmenti_left,query_compress,
- querylength,plusp,genestrand,first_read_p,sensep,
+ querylength,plusp,genestrand,first_read_p,sensedir,
segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
if (donor == NULL || acceptor == NULL) {
@@ -483,7 +1082,8 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
if (sufficient1p && sufficient2p) {
*nhits += 1;
return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches,
- donor,acceptor,/*distance*/segmentj_left - segmenti_left,
+ donor,acceptor,best_donor_prob,best_acceptor_prob,
+ /*distance*/segmentj_left - segmenti_left,
/*shortdistancep*/true,splicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -502,7 +1102,8 @@ Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lo
} else if (sufficient1p || sufficient2p) {
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches,
- donor,acceptor,/*distance*/segmentj_left - segmenti_left,
+ donor,acceptor,best_donor_prob,best_acceptor_prob,
+ /*distance*/segmentj_left - segmenti_left,
/*shortdistancep*/true,splicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -555,7 +1156,7 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
Univcoord_T best_donor_splicecoord, best_acceptor_splicecoord;
int best_donor_knowni, best_acceptor_knowni;
double best_prob, best_donor_prob, best_acceptor_prob, probi, probj;
- bool sufficient1p, sufficient2p, orig_plusp, sensep;
+ bool sufficient1p, sufficient2p, orig_plusp;
int sensedir;
int donori_nsites, acceptorj_nsites, antiacceptori_nsites, antidonorj_nsites;
@@ -832,20 +1433,19 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
best_prob,best_splice_pos,best_donor_splicecoord,best_acceptor_splicecoord));
if (orig_plusp == true) {
/* Originally from plus strand. No complement. */
- sensep = (plusp == true) ? true : false;
sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
assert(sensedir == SENSE_ANTI);
donor = Substring_new_donor(best_donor_splicecoord,best_donor_knowni,
best_splice_pos,best_segmenti_nmismatches,
best_donor_prob,/*left*/segmenti_left,query_compress,
- querylength,plusp,genestrand,first_read_p,sensep,
+ querylength,plusp,genestrand,first_read_p,sensedir,
segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
acceptor = Substring_new_acceptor(best_acceptor_splicecoord,best_acceptor_knowni,
best_splice_pos,best_segmentj_nmismatches,
best_acceptor_prob,/*left*/segmentj_left,query_compress,
- querylength,plusp,genestrand,first_read_p,sensep,
+ querylength,plusp,genestrand,first_read_p,sensedir,
segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
if (donor == NULL || acceptor == NULL) {
@@ -863,7 +1463,8 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
if (sufficient1p && sufficient2p) {
*nhits += 1;
return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches,
- donor,acceptor,/*distance*/segmentj_left - segmenti_left,
+ donor,acceptor,best_donor_prob,best_acceptor_prob,
+ /*distance*/segmentj_left - segmenti_left,
/*shortdistancep*/true,splicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -882,7 +1483,8 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
} else if (sufficient1p || sufficient2p) {
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_splice(&(*found_score),best_segmenti_nmismatches,best_segmentj_nmismatches,
- donor,acceptor,/*distance*/segmentj_left - segmenti_left,
+ donor,acceptor,best_donor_prob,best_acceptor_prob,
+ /*distance*/segmentj_left - segmenti_left,
/*shortdistancep*/true,splicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -899,20 +1501,19 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
} else {
/* Originally from minus strand. Complement. */
- sensep = (plusp == true) ? false : true;
sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
assert(sensedir == SENSE_ANTI);
donor = Substring_new_donor(best_donor_splicecoord,best_donor_knowni,
best_splice_pos,best_segmentj_nmismatches,
best_donor_prob,/*left*/segmentj_left,query_compress,
- querylength,plusp,genestrand,first_read_p,sensep,
+ querylength,plusp,genestrand,first_read_p,sensedir,
segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
acceptor = Substring_new_acceptor(best_acceptor_splicecoord,best_acceptor_knowni,
best_splice_pos,best_segmenti_nmismatches,
best_acceptor_prob,/*left*/segmenti_left,query_compress,
- querylength,plusp,genestrand,first_read_p,sensep,
+ querylength,plusp,genestrand,first_read_p,sensedir,
segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
if (donor == NULL || acceptor == NULL) {
@@ -929,7 +1530,8 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
if (sufficient1p && sufficient2p) {
*nhits += 1;
return List_push(hits,(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches,
- donor,acceptor,/*distance*/segmentj_left - segmenti_left,
+ donor,acceptor,best_donor_prob,best_acceptor_prob,
+ /*distance*/segmentj_left - segmenti_left,
/*shortdistancep*/true,splicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -948,7 +1550,8 @@ Splice_solve_single_antisense (int *found_score, int *nhits, List_T hits, List_T
} else if (sufficient1p || sufficient2p) {
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_splice(&(*found_score),best_segmentj_nmismatches,best_segmenti_nmismatches,
- donor,acceptor,/*distance*/segmentj_left - segmenti_left,
+ donor,acceptor,best_donor_prob,best_acceptor_prob,
+ /*distance*/segmentj_left - segmenti_left,
/*shortdistancep*/true,splicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -1005,7 +1608,7 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
int best_donor1_knowni, best_acceptor1_knowni, best_donor2_knowni, best_acceptor2_knowni;
double best_prob, best_donor1_prob, best_acceptor1_prob, best_donor2_prob, best_acceptor2_prob,
probi, proba, probb, probj;
- bool sufficient1p, sufficient2p, sufficient3p, sufficient4p, orig_plusp, sensep, matchp;
+ bool sufficient1p, sufficient2p, sufficient3p, sufficient4p, orig_plusp, matchp;
int sensedir;
int donori_nsites, acceptora_nsites, donorb_nsites, acceptorj_nsites,
@@ -1439,13 +2042,12 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
debug2(printf("best_prob = %f at splice_pos %d and %d\n",best_prob,best_splice_pos_1,best_splice_pos_2));
if (orig_plusp == true) {
/* Originally from plus strand. No complement. */
- sensep = (plusp == true) ? true : false;
sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
donor = Substring_new_donor(best_donor1_splicecoord,best_donor1_knowni,
best_splice_pos_1,best_segmenti_nmismatches,
best_donor1_prob,/*left*/segmenti_left,query_compress,
- querylength,plusp,genestrand,first_read_p,sensep,
+ querylength,plusp,genestrand,first_read_p,sensedir,
segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
shortexon = Substring_new_shortexon(best_acceptor1_splicecoord,best_acceptor1_knowni,
@@ -1454,13 +2056,13 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
/*acceptor_prob*/best_acceptor1_prob,/*donor_prob*/best_donor2_prob,
/*left*/segmentm_left,query_compress,
querylength,plusp,genestrand,first_read_p,
- sensep,/*acceptor_ambp*/false,/*donor_ambp*/false,
+ sensedir,/*acceptor_ambp*/false,/*donor_ambp*/false,
segmentm_chrnum,segmentm_chroffset,segmentm_chrhigh,segmentm_chrlength);
acceptor = Substring_new_acceptor(best_acceptor2_splicecoord,best_acceptor2_knowni,
best_splice_pos_2,best_segmentj_nmismatches,
best_acceptor2_prob,/*left*/segmentj_left,query_compress,
- querylength,plusp,genestrand,first_read_p,sensep,
+ querylength,plusp,genestrand,first_read_p,sensedir,
segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
if (donor == NULL || shortexon == NULL || acceptor == NULL) {
@@ -1480,6 +2082,8 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
if (sufficient1p && sufficient2p && sufficient3p && sufficient4p) {
*nhits += 1;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
+ best_donor1_prob,/*shortexonA_prob*/best_acceptor1_prob,
+ /*shortexonD_prob*/best_donor2_prob,best_acceptor2_prob,
/*amb_length_donor*/0,/*amb_length_acceptor*/0,
/*amb_prob_donor*/0.0,/*amb_prob_acceptor*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
@@ -1487,7 +2091,7 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,sarrayp));
+ splicing_penalty,querylength,first_read_p,sensedir,sarrayp));
} else if (subs_or_indels_p == true) {
/* Don't alter hits */
if (donor != NULL) Substring_free(&donor);
@@ -1500,6 +2104,8 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
} else if ((sufficient1p || sufficient2p) && (sufficient3p || sufficient4p)) {
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
+ best_donor1_prob,/*shortexonA_prob*/best_acceptor1_prob,
+ /*shortexonD_prob*/best_donor2_prob,best_acceptor2_prob,
/*amb_length_donor*/0,/*amb_length_acceptor*/0,
/*amb_prob_donor*/0.0,/*amb_prob_acceptor*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
@@ -1507,7 +2113,7 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,sarrayp));
+ splicing_penalty,querylength,first_read_p,sensedir,sarrayp));
} else {
if (donor != NULL) Substring_free(&donor);
if (shortexon != NULL) Substring_free(&shortexon);
@@ -1517,13 +2123,12 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
} else {
/* Originally from minus strand. Complement. */
- sensep = (plusp == true) ? false : true;
sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
donor = Substring_new_donor(best_donor2_splicecoord,best_donor2_knowni,
best_splice_pos_2,best_segmentj_nmismatches,
best_donor2_prob,/*left*/segmentj_left,query_compress,
- querylength,plusp,genestrand,first_read_p,sensep,
+ querylength,plusp,genestrand,first_read_p,sensedir,
segmentj_chrnum,segmentj_chroffset,segmentj_chrhigh,segmentj_chrlength);
shortexon = Substring_new_shortexon(best_acceptor2_splicecoord,best_acceptor2_knowni,
@@ -1531,13 +2136,13 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
/*acceptor_pos*/best_splice_pos_2,/*donor_pos*/best_splice_pos_1,best_segmentm_nmismatches,
/*acceptor_prob*/best_acceptor2_prob,/*donor_prob*/best_donor1_prob,
/*left*/segmentm_left,query_compress,querylength,
- plusp,genestrand,first_read_p,sensep,/*acceptor_ambp*/false,/*donor_ambp*/false,
+ plusp,genestrand,first_read_p,sensedir,/*acceptor_ambp*/false,/*donor_ambp*/false,
segmentm_chrnum,segmentm_chroffset,segmentm_chrhigh,segmentm_chrlength);
acceptor = Substring_new_acceptor(best_acceptor1_splicecoord,best_acceptor1_knowni,
best_splice_pos_1,best_segmenti_nmismatches,
best_acceptor1_prob,/*left*/segmenti_left,query_compress,
- querylength,plusp,genestrand,first_read_p,sensep,
+ querylength,plusp,genestrand,first_read_p,sensedir,
segmenti_chrnum,segmenti_chroffset,segmenti_chrhigh,segmenti_chrlength);
if (donor == NULL || shortexon == NULL || acceptor == NULL) {
@@ -1557,6 +2162,8 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
if (sufficient1p && sufficient2p && sufficient3p && sufficient4p) {
*nhits += 1;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
+ best_donor2_prob,/*shortexonA_prob*/best_acceptor2_prob,
+ /*shortexonD_prob*/best_donor1_prob,best_acceptor1_prob,
/*amb_length_donor*/0,/*amb_length_acceptor*/0,
/*amb_prob_donor*/0.0,/*amb_prob_acceptor*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
@@ -1564,7 +2171,7 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,sarrayp));
+ splicing_penalty,querylength,first_read_p,sensedir,sarrayp));
} else if (subs_or_indels_p == true) {
/* Don't alter hits */
if (donor != NULL) Substring_free(&donor);
@@ -1577,6 +2184,8 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
} else if ((sufficient1p || sufficient2p) && (sufficient3p || sufficient4p)) {
*lowprob = List_push(*lowprob,
(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
+ best_donor2_prob,/*shortexonA_prob*/best_acceptor2_prob,
+ /*shortexonD_prob*/best_donor1_prob,best_acceptor1_prob,
/*amb_length_donor*/0,/*amb_length_acceptor*/0,
/*amb_prob_donor*/0.0,/*amb_prob_acceptor*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
@@ -1584,7 +2193,7 @@ Splice_solve_double (int *found_score, int *nhits, List_T hits, List_T *lowprob,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,sarrayp));
+ splicing_penalty,querylength,first_read_p,sensedir,sarrayp));
} else {
if (donor != NULL) Substring_free(&donor);
if (shortexon != NULL) Substring_free(&shortexon);
@@ -1643,7 +2252,7 @@ group_by_segmenti_aux (int *found_score, List_T winners, List_T *ambiguous,
Univcoord_T segmenti_left;
Substring_T donor, acceptor;
int best_nmismatches, nmismatches, nmismatches_donor, nmismatches_acceptor;
- double best_prob, prob;
+ double best_prob, prob, donor_prob, acceptor_prob;
List_T accepted_hits, rejected_hits, donor_hits, acceptor_hits, p;
int sensedir;
@@ -1801,11 +2410,12 @@ group_by_segmenti_aux (int *found_score, List_T winners, List_T *ambiguous,
}
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
- prob = best_prob - Substring_chimera_prob(donor);
+ donor_prob = Junction_donor_prob(Stage3end_junctionA(hit));
+ prob = best_prob - donor_prob;
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
/*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
- donor,/*acceptor*/NULL,/*distance*/0U,
+ donor,/*acceptor*/NULL,donor_prob,/*acceptor_prob*/prob,/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,
/*amb_length*/Substring_match_length_orig(acceptor),/*amb_prob*/prob,
/*ambcoords_donor*/NULL,ambcoords,
@@ -1869,11 +2479,12 @@ group_by_segmenti_aux (int *found_score, List_T winners, List_T *ambiguous,
}
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
- prob = best_prob - Substring_chimera_prob(acceptor);
+ acceptor_prob = Junction_acceptor_prob(Stage3end_junctionD(hit));
+ prob = best_prob - acceptor_prob;
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
- /*donor*/NULL,acceptor,/*distance*/0U,
+ /*donor*/NULL,acceptor,/*donor_prob*/prob,acceptor_prob,/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,
/*amb_length*/Substring_match_length_orig(donor),/*amb_prob*/prob,
ambcoords,/*ambcoords_acceptor*/NULL,
@@ -1978,7 +2589,7 @@ group_by_segmentj_aux (int *found_score, List_T winners, List_T *ambiguous,
Univcoord_T segmentj_left;
Substring_T donor, acceptor;
int best_nmismatches, nmismatches, nmismatches_donor, nmismatches_acceptor;
- double best_prob, prob;
+ double best_prob, prob, donor_prob, acceptor_prob;
List_T accepted_hits, rejected_hits, donor_hits, acceptor_hits, p;
int donor_length, acceptor_length;
bool plusp;
@@ -2139,11 +2750,12 @@ group_by_segmentj_aux (int *found_score, List_T winners, List_T *ambiguous,
}
nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
- prob = best_prob - Substring_chimera_prob(acceptor);
+ donor_prob = Junction_donor_prob(Stage3end_junctionA(hit));
+ prob = best_prob - donor_prob;
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
/*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
- donor,/*acceptor*/NULL,/*distance*/0U,
+ donor,/*acceptor*/NULL,donor_prob,/*acceptor_prob*/prob,/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,
/*amb_length*/Substring_match_length_orig(acceptor),/*amb_prob*/prob,
/*ambcoords_donor*/NULL,ambcoords,
@@ -2207,11 +2819,12 @@ group_by_segmentj_aux (int *found_score, List_T winners, List_T *ambiguous,
}
nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
- prob = best_prob - Substring_chimera_prob(acceptor);
+ acceptor_prob = Junction_acceptor_prob(Stage3end_junctionD(hit));
+ prob = best_prob - acceptor_prob;
*ambiguous = List_push(*ambiguous,
(void *) Stage3end_new_splice(&(*found_score),
nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
- /*donor*/NULL,acceptor,/*distance*/0U,
+ /*donor*/NULL,acceptor,/*donor_prob*/prob,acceptor_prob,/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,
/*amb_length*/Substring_match_length_orig(donor),/*amb_prob*/prob,
ambcoords,/*ambcoords_acceptor*/NULL,
diff --git a/src/splice.h b/src/splice.h
index 1baebb9..3f017e5 100644
--- a/src/splice.h
+++ b/src/splice.h
@@ -1,4 +1,4 @@
-/* $Id: splice.h 140368 2014-07-02 00:56:33Z twu $ */
+/* $Id: splice.h 166641 2015-05-29 21:13:04Z twu $ */
#ifndef SPLICE_INCLUDED
#define SPLICE_INCLUDED
#include "bool.h"
@@ -10,6 +10,42 @@
extern void
Splice_setup (int min_shortend_in);
+extern int
+Splice_resolve_sense (int *best_knowni_i, int *best_knowni_j,
+ int *best_nmismatches_i, int *best_nmismatches_j,
+ double *best_prob_i, double *best_prob_j,
+
+ Univcoord_T segmenti_left, Univcoord_T segmentj_left,
+ Univcoord_T segmenti_chroffset, Univcoord_T segmentj_chroffset,
+
+ int querystart, int queryend, int querylength, Compress_T query_compress,
+ int *segmenti_donor_knownpos, int *segmentj_acceptor_knownpos,
+ int *segmentj_antidonor_knownpos, int *segmenti_antiacceptor_knownpos,
+ int *segmenti_donor_knowni, int *segmentj_acceptor_knowni,
+ int *segmentj_antidonor_knowni, int *segmenti_antiacceptor_knowni,
+ int segmenti_donor_nknown, int segmentj_acceptor_nknown,
+ int segmentj_antidonor_nknown, int segmenti_antiacceptor_nknown,
+ int splicing_penalty, int max_mismatches_allowed,
+ bool plusp, int genestrand, bool first_read_p);
+
+extern int
+Splice_resolve_antisense (int *best_knowni_i, int *best_knowni_j,
+ int *best_nmismatches_i, int *best_nmismatches_j,
+ double *best_prob_i, double *best_prob_j,
+
+ Univcoord_T segmenti_left, Univcoord_T segmentj_left,
+ Univcoord_T segmenti_chroffset, Univcoord_T segmentj_chroffset,
+
+ int querystart, int queryend, int querylength, Compress_T query_compress,
+ int *segmenti_donor_knownpos, int *segmentj_acceptor_knownpos,
+ int *segmentj_antidonor_knownpos, int *segmenti_antiacceptor_knownpos,
+ int *segmenti_donor_knowni, int *segmentj_acceptor_knowni,
+ int *segmentj_antidonor_knowni, int *segmenti_antiacceptor_knowni,
+ int segmenti_donor_nknown, int segmentj_acceptor_nknown,
+ int segmentj_antidonor_nknown, int segmenti_antiacceptor_nknown,
+ int splicing_penalty, int max_mismatches_allowed,
+ bool plusp, int genestrand, bool first_read_p);
+
extern List_T
Splice_solve_single_sense (int *found_score, int *nhits, List_T hits, List_T *lowprob,
diff --git a/src/stage1.c b/src/stage1.c
index c2b3d24..2ad2cdb 100644
--- a/src/stage1.c
+++ b/src/stage1.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1.c 158357 2015-02-10 19:10:16Z twu $";
+static char rcsid[] = "$Id: stage1.c 158350 2015-02-10 18:43:34Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/stage1.h b/src/stage1.h
index 096dd2c..96f0a2c 100644
--- a/src/stage1.h
+++ b/src/stage1.h
@@ -1,6 +1,7 @@
-/* $Id: stage1.h 128855 2014-02-28 21:50:24Z twu $ */
+/* $Id: stage1.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef STAGE1_INCLUDED
#define STAGE1_INCLUDED
+
#include "bool.h"
#include "genomicpos.h"
#include "indexdb.h"
diff --git a/src/stage1hr.c b/src/stage1hr.c
index 89b847e..a6f5fa4 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 157977 2015-02-03 18:46:53Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 167163 2015-06-09 20:54:02Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -50,6 +50,7 @@ static char rcsid[] = "$Id: stage1hr.c 157977 2015-02-03 18:46:53Z twu $";
#include "stage2.h"
#include "stage3.h"
+#include "comp.h"
#ifdef WORDS_BIGENDIAN
@@ -65,11 +66,14 @@ static char rcsid[] = "$Id: stage1hr.c 157977 2015-02-03 18:46:53Z twu $";
#endif
+#define SPEED 1
+
/* Note: Heapsort still turns out to be a bit faster than a global
qsort, because it takes advantage of the fact that the positions
within each batch are already sorted. Also, heapsort can handle
8-byte positions. */
+#define LONG_ENDSPLICES 1 /* Necessary to get outside splices correctly */
#define NO_EXTENSIONS_BEFORE_ZERO 1
@@ -91,6 +95,7 @@ static char rcsid[] = "$Id: stage1hr.c 157977 2015-02-03 18:46:53Z twu $";
#define MAX_NALIGNMENTS 3
#define MAX_NTERMINALS 100
+#define MAX_ALLOCATION 200
static bool use_sarray_p = true;
static bool use_only_sarray_p = true;
@@ -119,6 +124,7 @@ static int reject_trimlength;
static bool novelsplicingp;
static bool knownsplicingp;
+static bool find_dna_chimeras_p;
static bool distances_observed_p;
static Chrpos_T min_intronlength;
@@ -134,10 +140,10 @@ static int min_distantsplicing_identity;
/* GMAP parameters */
-static bool gmap_pairsearch_p;
+static bool gmap_segments_p; /* previously called gmap_terminal_p. Should move earlier (1). */
+static bool gmap_pairsearch_p; /* controls halfmapping. Should move later (2). */
+static bool gmap_improvement_p; /* Should be at end (3). */
static bool gmap_indel_knownsplice_p;
-static bool gmap_terminal_p;
-static bool gmap_improvement_p;
static bool gmap_rerun_p = true;
static int antistranded_penalty;
@@ -150,18 +156,10 @@ static int extramaterial_paired;
static int trigger_score_for_gmap;
static int gmap_allowance;
static int max_gmap_pairsearch;
-static int max_gmap_terminal;
+static int max_gmap_segments; /* Not used */
static int max_gmap_improvement;
-static int sufflookback = 60;
-static int nsufflookback = 5;
-static int extraband_single = 3;
-static int extraband_end = 3; /* Shouldn't differ from 0, since onesidegapp is true? */
-static int extraband_paired = 7;
static int minendexon = 9;
-static int ngap = 3; /* 0? */
-
-
#define A_CHAR 0x0
@@ -199,6 +197,7 @@ static Univcoord_T *chroffsets;
static Univcoord_T *chrhighs;
static Chrpos_T *chrlengths; /* May differ from chrhigh - chroffset in circular chromosomes */
static int nchromosomes;
+static Genome_T genome;
static int leftreadshift;
static unsigned int oligobase_mask; /* same as kmer_mask */
@@ -216,7 +215,12 @@ static int end_miss_two; /* Used for computing max_terminal_length */
#define MAX_LOCALSPLICING_POTENTIAL 1000
+#if 0
+/* Creates issues with ambiguous substrings */
#define LOCALSPLICING_NMATCHES_SLOP 1
+#else
+#define LOCALSPLICING_NMATCHES_SLOP 0
+#endif
#define LOCALSPLICING_PROB_SLOP 0.05
@@ -319,7 +323,7 @@ static int end_miss_two; /* Used for computing max_terminal_length */
#define debug4h(x)
#endif
-/* Determining spliceable segments */
+/* Pairing up segments */
#ifdef DEBUG5
#define debug5(x) x
#else
@@ -422,6 +426,8 @@ struct Segment_T {
int querypos5;
int querypos3;
+ Univcoord_T lowpos; /* Needed for dynamic programming in converting segment to GMAP */
+ Univcoord_T highpos; /* Needed for dynamic programming in converting segment to GMAP */
int floor;
int floor_xfirst;
@@ -436,6 +442,7 @@ struct Segment_T {
bool right_splice_p; /* Set by find_singlesplices, used by find_doublesplices for speed */
bool usedp;
+ bool pairablep;
#if 0
int leftspan; /* For segmentm of double splice */
@@ -444,6 +451,97 @@ struct Segment_T {
};
+static int
+Segment_length_cmp (const void *a, const void *b) {
+ Segment_T x = * (Segment_T *) a;
+ Segment_T y = * (Segment_T *) b;
+
+ int xlength, ylength;
+
+ xlength = x->querypos3 - x->querypos5;
+ ylength = y->querypos3 - y->querypos5;
+
+ if (xlength > ylength) {
+ return -1;
+ } else if (ylength > xlength) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+static int
+Segment_diagonal_cmp (const void *a, const void *b) {
+ Segment_T x = * (Segment_T *) a;
+ Segment_T y = * (Segment_T *) b;
+
+ if (x->diagonal < y->diagonal) {
+ return -1;
+ } else if (y->diagonal < x->diagonal) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+static int
+Segment_querypos5_ascending_cmp (const void *a, const void *b) {
+ Segment_T x = * (Segment_T *) a;
+ Segment_T y = * (Segment_T *) b;
+
+ if (x->querypos5 < y->querypos5) {
+ return -1;
+ } else if (y->querypos5 < x->querypos5) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+static int
+Segment_querypos3_ascending_cmp (const void *a, const void *b) {
+ Segment_T x = * (Segment_T *) a;
+ Segment_T y = * (Segment_T *) b;
+
+ if (x->querypos3 < y->querypos3) {
+ return -1;
+ } else if (y->querypos3 < x->querypos3) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+static int
+Segment_querypos5_descending_cmp (const void *a, const void *b) {
+ Segment_T x = * (Segment_T *) a;
+ Segment_T y = * (Segment_T *) b;
+
+ if (x->querypos5 > y->querypos5) {
+ return -1;
+ } else if (y->querypos5 > x->querypos5) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+static int
+Segment_querypos3_descending_cmp (const void *a, const void *b) {
+ Segment_T x = * (Segment_T *) a;
+ Segment_T y = * (Segment_T *) b;
+
+ if (x->querypos3 > y->querypos3) {
+ return -1;
+ } else if (y->querypos3 > x->querypos3) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+
+
struct Floors_T {
int *allocated0;
int *prev_omitted;
@@ -680,6 +778,8 @@ struct T {
int plus_spanningset_nelts[MAX_INDEX1INTERVAL];
int minus_spanningset_nelts[MAX_INDEX1INTERVAL];
+ bool read_oligos_p;
+
#ifdef LARGE_GENOMES
unsigned char **plus_positions_high_allocated;
unsigned char **plus_positions_high; /* points to above[index1interval-1] */
@@ -1163,6 +1263,7 @@ read_oligos (bool *allvalidp, T this, char *queryuc_ptr, int querylength,
Reader_free(&reader);
+ this->read_oligos_p = true;
return noligos;
}
@@ -1380,6 +1481,8 @@ Stage1_new (int querylength) {
new->minus_spanningset_nelts[mod] = 0;
}
+ new->read_oligos_p = false;
+
#ifdef LARGE_GENOMES
new->plus_positions_high_allocated = (unsigned char **) MALLOC((querylength+overhang) * sizeof(unsigned char *));
new->plus_positions_high = &(new->plus_positions_high_allocated[overhang]);
@@ -3485,18 +3588,19 @@ trim_ends_unknowns_only (int *trim5, int *trim3, char *sequence1, char *sequence
/* Returns a master pointer (segments) to the block of segments */
/* If end_indel_mismatches_allowed set to 0, won't save any segments for end indels. */
static List_T
-find_complete_mm (int *found_score, int *nhits, List_T hits, struct Segment_T *segments, int nsegments,
+find_complete_mm (int *found_score, int *nhits, List_T hits, List_T anchor_segments,
int querylength, Compress_T query_compress,
int max_mismatches_allowed, bool plusp, int genestrand, bool first_read_p) {
Stage3end_T hit;
int nmismatches;
Univcoord_T left;
Segment_T segmenti;
+ List_T p;
- for (segmenti = segments; segmenti < &(segments[nsegments]); segmenti++) {
- if (segmenti->diagonal == (Univcoord_T) -1) {
- /* Skip chr marker segment */
- } else if (segmenti->floor <= max_mismatches_allowed) {
+ for (p = anchor_segments; p != NULL; p = List_next(p)) {
+ segmenti = (Segment_T) List_head(p);
+ assert(segmenti->diagonal != (Univcoord_T) -1);
+ if (segmenti->floor <= max_mismatches_allowed) {
left = segmenti->diagonal - querylength;
nmismatches = Genome_count_mismatches_limit(query_compress,left,/*pos5*/0,/*pos3*/querylength,
max_mismatches_allowed,plusp,genestrand,first_read_p);
@@ -3518,25 +3622,31 @@ find_complete_mm (int *found_score, int *nhits, List_T hits, struct Segment_T *s
}
+/* TODO: Change spliceable to be an attribute of the segment. Then we
+ can loop over anchor_segments only */
static struct Segment_T *
-identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
+identify_all_segments (int *nsegments, List_T *anchor_segments, Segment_T **spliceable, int *nspliceable,
#ifdef LARGE_GENOMES
unsigned char **positions_high, UINT4 **positions_low,
#else
Univcoord_T **positions,
#endif
int *npositions, bool *omitted, int querylength, int query_lastpos, Floors_T floors,
- bool plusp) {
+ int max_mismatches_allowed, bool plusp) {
struct Segment_T *segments = NULL;
+ Segment_T *array;
+ int length_threshold;
+ int nanchors, n;
+
Batch_T batch, sentinel;
struct Batch_T sentinel_struct, *batchpool;
Batch_T *heap;
int heapsize = 0;
int parenti, smallesti, righti, i;
int querypos, first_querypos, last_querypos;
- int floor, floor_xfirst, floor_xlast;
int floor_left, floor_right, floor_incr;
- int *floors_from_neg3, *floors_from_xfirst, *floors_to_xlast, *floors_to_pos3;
+ int floor, floor_xfirst, floor_xlast, *floors_from_xfirst, *floors_to_xlast;
+ int *floors_from_neg3, *floors_to_pos3;
/* int exclude_xfirst, exclude_xlast; */
Univcoord_T diagonal, segment_left, last_diagonal, chroffset = 0U, chrhigh = 0U;
Chrpos_T chrlength, max_distance;
@@ -3551,9 +3661,14 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
int total_npositions = 0;
int joffset = 0, j;
+#ifdef DEBUG
+ List_T p;
+ Segment_T segment;
+#endif
+
Segment_T ptr, ptr_chrstart;
Segment_T *ptr_spliceable;
- bool next_spliceable_p;
+ /* bool next_spliceable_p; */
#ifdef DEBUG19
Segment_T ptr0;
int k;
@@ -3567,7 +3682,15 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
Univcoord_T *splicesites_local, splicesites_static[1];
int nsplicesites_local;
- debug(printf("*** Starting identify_all_segments ***\n"));
+ debug(printf("*** Starting identify_all_segments on %s ***\n",plusp ? "plus" : "minus"));
+
+ if (floors == NULL) {
+ *nsegments = 0;
+ *anchor_segments = (List_T) NULL;
+ *spliceable = (Segment_T *) NULL;
+ *nspliceable = 0;
+ return (struct Segment_T *) NULL;
+ }
if (splicesites == NULL) {
splicesites_local = splicesites_static;
@@ -3585,7 +3708,7 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
/* Create sentinel */
#ifdef DIAGONAL_ADD_QUERYPOS
- sentinel_struct.diagonal_add_querypos = (Univcoord_T) -1; /* infinity */
+ sentinel_struct.diagonal_add_querypos = (UINT8) -1; /* infinity */
sentinel_struct.diagonal_add_querypos <<= 32;
#else
sentinel_struct.querypos = querylength; /* essentially infinity */
@@ -3687,6 +3810,7 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
floors_from_xfirst = floors->scorefrom[/* xfirst_from = */ firstbound-index1interval+max_end_insertions];
floors_to_xlast = floors->scoreto[/* xlast_to = */ lastbound+1+index1interval-index1part-max_end_insertions];
#else
+ /* This was previously run in identify_all_segments and not in identify_all_segments_for_terminals */
if (spansize /* +max_end_insertions */ > query_lastpos + index1interval) {
floors_from_xfirst = floors->scorefrom[query_lastpos+index1interval];
} else {
@@ -3832,6 +3956,7 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
debug1(printf("*multiple_mm_%s, diagonal %llu, querypos %d, floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos,
floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+
} else {
/* End of diagonal */
floor_incr = floors_to_pos3[last_querypos] /* floors->score[last_querypos][query_lastpos+index1interval] */;
@@ -3950,6 +4075,16 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
ptr->chrlength = chrlength;
ptr->querypos5 = first_querypos;
ptr->querypos3 = last_querypos;
+
+ /* FORMULA */
+ if (plusp) {
+ ptr->lowpos = ptr->diagonal - querylength + ptr->querypos5;
+ ptr->highpos = ptr->diagonal - querylength + ptr->querypos3 + index1part;
+ } else {
+ ptr->lowpos = ptr->diagonal - ptr->querypos3 - index1part - index1part;
+ ptr->highpos = ptr->diagonal - ptr->querypos5 - index1part;
+ }
+
ptr->floor = floor;
ptr->floor_xfirst = floor_xfirst;
ptr->floor_xlast = floor_xlast;
@@ -3957,10 +4092,11 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
ptr->floor_right = floor_right;
ptr->leftmost = ptr->rightmost = -1;
ptr->left_splice_p = ptr->right_splice_p = false;
- ptr->usedp = false;
#if 0
ptr->leftspan = ptr->rightspan = -1;
#endif
+ ptr->usedp = false;
+ ptr->pairablep = false;
#if 0
/* Not doing this, because the max_distance test is already good enough */
@@ -3984,11 +4120,18 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
#endif
if (diagonal <= last_diagonal + max_distance) {
*ptr_spliceable++ = ptr;
+ debug4s(printf("%s diagonal %u is spliceable because next one is at %u\n",
+ plusp ? "plus" : "minus",last_diagonal,diagonal));
} else {
- debug4s(printf("plus diagonal %u is not spliceable because next one is at %u\n",
- last_diagonal,diagonal));
+ debug4s(printf("%s diagonal %u is not spliceable because next one is at %u\n",
+ plusp ? "plus" : "minus",last_diagonal,diagonal));
}
- debug14(printf("Saving segment at %u\n",last_diagonal));
+ debug14(printf("Saving segment at %u, query %d..%d",last_diagonal,ptr->querypos5,ptr->querypos3));
+ if (last_querypos >= first_querypos + /*min_segment_length*/1) {
+ *anchor_segments = List_push(*anchor_segments,(void *) ptr);
+ debug14(printf(" ANCHOR"));
+ }
+ debug14(printf("\n"));
ptr++;
}
@@ -4023,6 +4166,7 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
debug1(printf("start of diagonal %llu, first_querypos = %d => initial values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
(unsigned long long) diagonal,first_querypos,
floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+
}
last_querypos = querypos;
@@ -4209,6 +4353,16 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
ptr->chrlength = chrlength;
ptr->querypos5 = first_querypos;
ptr->querypos3 = last_querypos;
+
+ /* FORMULA */
+ if (plusp) {
+ ptr->lowpos = ptr->diagonal - querylength + ptr->querypos5;
+ ptr->highpos = ptr->diagonal - querylength + ptr->querypos3 + index1part;
+ } else {
+ ptr->lowpos = ptr->diagonal - ptr->querypos3 - index1part - index1part;
+ ptr->highpos = ptr->diagonal - ptr->querypos5 - index1part;
+ }
+
ptr->floor = floor;
ptr->floor_xfirst = floor_xfirst;
ptr->floor_xlast = floor_xlast;
@@ -4216,12 +4370,19 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
ptr->floor_right = floor_right;
ptr->leftmost = ptr->rightmost = -1;
ptr->left_splice_p = ptr->right_splice_p = false;
- ptr->usedp = false;
#if 0
ptr->leftspan = ptr->rightspan = -1;
#endif
+ ptr->usedp = false;
+ ptr->pairablep = false;
+
/* Last segment is not spliceable */
- debug14(printf("Saving segment at %u\n",last_diagonal));
+ debug14(printf("Saving segment at %u, query %d..%d",last_diagonal,ptr->querypos5,ptr->querypos3));
+ if (last_querypos >= first_querypos + /*min_segment_length*/1) {
+ debug14(printf(" ANCHOR"));
+ *anchor_segments = List_push(*anchor_segments,(void *) ptr);
+ }
+ debug14(printf("\n"));
ptr++;
}
@@ -4255,612 +4416,242 @@ identify_all_segments (int *nsegments, Segment_T **spliceable, int *nspliceable,
assert(*nsegments <= total_npositions + nchromosomes);
+ *anchor_segments = List_reverse(*anchor_segments);
+#ifdef DEBUG
+ printf("%d anchor segments\n",List_length(*anchor_segments));
+ for (p = *anchor_segments; p != NULL; p = List_next(p)) {
+ segment = (Segment_T) List_head(p);
+ printf("%u %d..%d\n",segment->diagonal,segment->querypos5,segment->querypos3);
+ }
+#endif
+
+ if (List_length(*anchor_segments) > 10) {
+ array = (Segment_T *) List_to_array_n(&nanchors,*anchor_segments);
+ qsort(array,nanchors,sizeof(Segment_T),Segment_length_cmp);
+ List_free(&(*anchor_segments));
+ *anchor_segments = (List_T) NULL;
+
+ length_threshold = array[10]->querypos3 - array[10]->querypos5;
+ n = 10;
+ while (n < nanchors && array[n]->querypos3 - array[n]->querypos5 == length_threshold) {
+ n++;
+ }
+ if (n <= 20) {
+ qsort(array,n,sizeof(Segment_T),Segment_diagonal_cmp);
+ for (i = n-1; i >= 0; i--) {
+ *anchor_segments = List_push(*anchor_segments,(void *) array[i]);
+ }
+ }
+ FREE(array);
+ }
+
+#ifdef DEBUG
+ printf("%d selected anchor segments\n",List_length(*anchor_segments));
+ for (p = *anchor_segments; p != NULL; p = List_next(p)) {
+ segment = (Segment_T) List_head(p);
+ printf("%u %d..%d\n",segment->diagonal,segment->querypos5,segment->querypos3);
+ }
+#endif
+
return segments;
}
-/* Specialized version of identify_all_segments that stores only floor_left and floor_right */
-static struct Segment_T *
-identify_all_segments_for_terminals (int *nsegments,
-#ifdef LARGE_GENOMES
- unsigned char **positions_high, UINT4 **positions_low,
-#else
- Univcoord_T **positions,
-#endif
- int *npositions, bool *omitted, int querylength, int query_lastpos,
- Floors_T floors, int max_mismatches_allowed, bool plusp) {
- struct Segment_T *segments = NULL;
- Batch_T batch, sentinel;
- struct Batch_T sentinel_struct, *batchpool;
- Batch_T *heap;
- int heapsize = 0;
- int parenti, smallesti, righti, i;
- int querypos, first_querypos, last_querypos;
- int floor_left, floor_right, floor_incr;
- int *floors_from_neg3, *floors_to_pos3;
- /* int exclude_xfirst, exclude_xlast; */
- Univcoord_T diagonal, last_diagonal, chroffset = 0U, chrhigh = 0U;
- Chrpos_T chrlength;
- Chrnum_T chrnum = 1;
-#ifdef OLD_FLOOR_ENDS
- int halfquerylength, halfquery_lastpos;
-#endif
-#ifdef DIAGONAL_ADD_QUERYPOS
- UINT8 diagonal_add_querypos;
-#endif
- int total_npositions = 0;
- Segment_T ptr, ptr_chrstart;
-#ifndef SLOW_CHR_UPDATE
- Univcoord_T goal;
- int j, nchromosomes_local = nchromosomes;
- Univcoord_T *chrhighs_local = chrhighs;
-#endif
+#if 0
+/* Modified from pair_up_concordant_aux in stage3hr.c */
+static void
+pair_up_segments (struct Segment_T *plus_segments_5, int plus_nsegments_5,
+ struct Segment_T *minus_segments_5, int minus_nsegments_5,
+ struct Segment_T *plus_segments_3, int plus_nsegments_3,
+ struct Segment_T *minus_segments_3, int minus_nsegments_3,
+ int querylength5, int querylength3, Chrpos_T pairmax) {
+ int i, j;
+ Univcoord_T insert_start;
+ Segment_T segment5, segment3; /* Need pointers, because we are changing the pairable value */
- debug(printf("*** Starting identify_all_segments ***\n"));
+ debug(printf("Entered pair_up_segments\n"));
-#ifdef OLD_FLOOR_ENDS
- halfquerylength = querylength / 2;
- halfquery_lastpos = halfquerylength - index1part;
+ /* plus/plus */
+ j = 0;
+ for (i = 0; i < plus_nsegments_5; i++) {
+ segment5 = &(plus_segments_5[i]);
+ if ((insert_start = segment5->diagonal) == (Univcoord_T) -1) {
+ /* Skip chromosomal end marker */
+ } else {
+#ifdef DEBUG5
+ printf("plus/plus: i=%d/%d %u %d..%d\n",
+ i,plus_nsegments_5,segment5->diagonal,segment5->querypos5,segment5->querypos3);
+ if (j >= plus_nsegments_3) {
+ printf(" current: j=%d/%d\n",j,plus_nsegments_3);
+ } else if (plus_segments_3[j].diagonal == (Univcoord_T) -1) {
+ printf(" current: j=%d/%d %u\n",j,plus_nsegments_3,plus_segments_3[j].diagonal);
+ } else {
+ printf(" current: j=%d/%d %u %d..%d\n",
+ j,plus_nsegments_3,plus_segments_3[j].diagonal,plus_segments_3[j].querypos5,plus_segments_3[j].querypos3);
+ }
#endif
- /* Create sentinel */
-#ifdef DIAGONAL_ADD_QUERYPOS
- sentinel_struct.diagonal_add_querypos = (UINT8) -1; /* infinity */
- sentinel_struct.diagonal_add_querypos <<= 32;
-#else
- sentinel_struct.querypos = querylength; /* essentially infinity */
- sentinel_struct.diagonal = (Univcoord_T) -1; /* infinity */
+ /* Get to correct chrnum */
+ while (j < plus_nsegments_3 && (plus_segments_3[j].diagonal == (Univcoord_T) -1 || plus_segments_3[j].diagonal < segment5->diagonal)) {
+#ifdef DEBUG5
+ if (plus_segments_3[j].diagonal == (Univcoord_T) -1) {
+ printf(" advancing: j=%d/%d %u\n",j,plus_nsegments_3,plus_segments_3[j].diagonal);
+ } else {
+ printf(" advancing: j=%d/%d %u %d..%d\n",
+ j,plus_nsegments_3,plus_segments_3[j].diagonal,plus_segments_3[j].querypos5,plus_segments_3[j].querypos3);
+ }
#endif
- sentinel = &sentinel_struct;
-
- /* Set up batches */
- batchpool = (struct Batch_T *) MALLOCA((query_lastpos+1) * sizeof(struct Batch_T));
- heap = (Batch_T *) MALLOCA((2*(query_lastpos+1)+1+1) * sizeof(Batch_T));
+ j++;
+ }
- /* Don't add entries for compoundpos positions (skip querypos -2, -1, lastpos+1, lastpos+2) */
- if (plusp) {
- for (querypos = 0, i = 0; querypos <= query_lastpos; querypos++) {
- if (omitted[querypos] == true) {
- debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n",
- querypos,npositions[querypos],omitted[querypos]));
- } else if (npositions[querypos] > 0) {
- debug1(printf("Adding batch for querypos %d with %d positions, omitted %d\n",
- querypos,npositions[querypos],omitted[querypos]));
- batch = &(batchpool[i]);
-#ifdef LARGE_GENOMES
- Batch_init(batch,querypos,/*diagterm*/querylength - querypos,positions_high[querypos],positions_low[querypos],
- npositions[querypos],querylength);
-#else
- Batch_init(batch,querypos,/*diagterm*/querylength - querypos,positions[querypos],npositions[querypos],querylength);
-#endif
- total_npositions += npositions[querypos];
- if (batch->npositions > 0) {
- min_heap_insert(heap,&heapsize,batch);
- i++;
+ if (j < plus_nsegments_3) {
+ while (j >= 0 && plus_segments_3[j].diagonal != (Univcoord_T) -1 && plus_segments_3[j].diagonal > segment5->diagonal) {
+ debug5(printf(" backup: j=%d/%d %u %d..%d\n",
+ j,plus_nsegments_3,plus_segments_3[j].diagonal,plus_segments_3[j].querypos5,plus_segments_3[j].querypos3));
+ j--;
+ }
+ j++; /* Finish backup */
+
+ /* Cannot perform arithmetic on diagonal, because we want to preserve -1 as being the largest value */
+ /* Ignore inclusion of querylength inside pairmax */
+ while (j < plus_nsegments_3 && plus_segments_3[j].diagonal <= insert_start + pairmax /*- querylength3*/) {
+ debug5(printf(" overlap: j=%d/%d, %u <= %u + %u, %d..%d\n",
+ j,plus_nsegments_3,plus_segments_3[j].diagonal,
+ insert_start,pairmax,plus_segments_3[j].querypos5,plus_segments_3[j].querypos3));
+ debug5(printf("Setting plus segments %d and %d to be pairable: %u and %u\n",i,j,segment5->diagonal,plus_segments_3[j].diagonal));
+ segment5->pairablep = true;
+ plus_segments_3[j].pairablep = true;
+ j++;
}
- } else {
- debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n",
- querypos,npositions[querypos],omitted[querypos]));
}
}
- } else {
- for (querypos = 0, i = 0; querypos <= query_lastpos; querypos++) {
- if (omitted[querypos] == true) {
- debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n",
- querypos,npositions[querypos],omitted[querypos]));
- } else if (npositions[querypos] > 0) {
- debug1(printf("Adding batch for querypos %d with %d positions, omitted %d\n",
- querypos,npositions[querypos],omitted[querypos]));
- batch = &(batchpool[i]);
-#ifdef LARGE_GENOMES
- Batch_init(batch,querypos,/*diagterm*/querypos + index1part,positions_high[querypos],positions_low[querypos],
- npositions[querypos],querylength);
-#else
- Batch_init(batch,querypos,/*diagterm*/querypos + index1part,positions[querypos],npositions[querypos],querylength);
+ }
+
+ /* minus/minus */
+ j = 0;
+ for (i = 0; i < minus_nsegments_3; i++) {
+ segment3 = &(minus_segments_3[i]);
+ if ((insert_start = segment3->diagonal) == (Univcoord_T) -1) {
+ /* Skip chromosomal end marker */
+ } else {
+#ifdef DEBUG5
+ printf("minus/minus: i=%d/%d %u %d..%d\n",
+ i,minus_nsegments_3,segment3->diagonal,segment3->querypos5,segment3->querypos3);
+ if (j >= minus_nsegments_5) {
+ printf(" current: j=%d/%d\n",j,minus_nsegments_5);
+ } else if (minus_segments_5[j].diagonal == (Univcoord_T) -1) {
+ printf(" current: j=%d/%d %u\n",j,minus_nsegments_5,minus_segments_5[j].diagonal);
+ } else {
+ printf(" current: j=%d/%d %u %d..%d\n",
+ j,minus_nsegments_5,minus_segments_5[j].diagonal,minus_segments_5[j].querypos5,minus_segments_5[j].querypos3);
+ }
#endif
- total_npositions += npositions[querypos];
- if (batch->npositions > 0) {
- min_heap_insert(heap,&heapsize,batch);
- i++;
+
+ /* Get to correct chrnum */
+ while (j < minus_nsegments_5 && (minus_segments_5[j].diagonal == (Univcoord_T) -1 || minus_segments_5[j].diagonal < segment3->diagonal)) {
+#ifdef DEBUG5
+ if (minus_segments_5[j].diagonal == (Univcoord_T) -1) {
+ printf(" advancing: j=%d/%d %u\n",j,minus_nsegments_5,minus_segments_5[j].diagonal);
+ } else {
+ printf(" advancing: j=%d/%d %u %d..%d\n",
+ j,minus_nsegments_5,minus_segments_5[j].diagonal,minus_segments_5[j].querypos5,minus_segments_5[j].querypos3);
+ }
+#endif
+ j++;
+ }
+
+ if (j < minus_nsegments_5) {
+ while (j >= 0 && minus_segments_5[j].diagonal != (Univcoord_T) -1 && minus_segments_5[j].diagonal > segment3->diagonal) {
+ debug5(printf(" backup: j=%d/%d %u %d..%d\n",
+ j,minus_nsegments_5,minus_segments_5[j].diagonal,minus_segments_5[j].querypos5,minus_segments_5[j].querypos3));
+ j--;
+ }
+ j++; /* Finish backup */
+
+ /* Cannot perform arithmetic on diagonal, because we want to preserve -1 as being the largest value */
+ /* Ignore inclusion of querylength inside pairmax */
+ while (j < minus_nsegments_5 && minus_segments_5[j].diagonal <= insert_start + pairmax /*- querylength5*/) {
+ debug5(printf(" overlap: j=%d/%d %u %d..%d\n",
+ j,minus_nsegments_5,minus_segments_5[j].diagonal,minus_segments_5[j].querypos5,minus_segments_5[j].querypos3));
+ debug5(printf("Setting minus segments %d and %d to be pairable: %u and %u\n",i,j,segment3->diagonal,minus_segments_5[j].diagonal));
+ segment3->pairablep = true;
+ minus_segments_5[j].pairablep = true;
+ j++;
}
- } else {
- debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n",
- querypos,npositions[querypos],omitted[querypos]));
}
}
}
+ return;
+}
+#endif
- if (i == 0) {
- FREEA(heap);
- FREEA(batchpool);
- *nsegments = 0;
- return (struct Segment_T *) NULL;
- }
- /* Set up rest of heap */
- for (i = heapsize+1; i <= 2*heapsize+1; i++) {
- heap[i] = sentinel;
- }
+static void
+pair_up_anchor_segments (List_T plus_anchor_segments_5, List_T minus_anchor_segments_5,
+ List_T plus_anchor_segments_3, List_T minus_anchor_segments_3,
+ Chrpos_T pairmax) {
+ Univcoord_T insert_start;
+ Segment_T segment5, segment3;
+ List_T q, pstart, pend, p;
- /* Putting chr marker "segments" after each chromosome */
- segments = (struct Segment_T *) CALLOC(total_npositions + nchromosomes,sizeof(struct Segment_T));
- ptr_chrstart = ptr = &(segments[0]);
+ debug(printf("Entering pair_up_anchor_segments\n"));
- /*
- if ((exclude_xfirst = firstbound-2-index1part-max_end_insertions) < index1interval) {
- exclude_xfirst = index1interval;
- }
- if ((exclude_xlast = lastbound+1+max_end_insertions) > query_lastpos-index1interval) {
- exclude_xlast = query_lastpos-index1interval;
+ /* plus/plus */
+ pstart = plus_anchor_segments_3;
+ for (q = plus_anchor_segments_5; q != NULL && pstart != NULL; q = List_next(q)) {
+ segment5 = (Segment_T) List_head(q);
+ assert(segment5->diagonal != (Univcoord_T) -1);
+ insert_start = segment5->diagonal;
+
+ while (pstart != NULL && ((Segment_T) pstart->first)->diagonal < segment5->diagonal) {
+ pstart = List_next(pstart);
+ }
+
+ pend = pstart;
+ while (pend != NULL && ((Segment_T) pend->first)->diagonal < segment5->diagonal + pairmax) {
+ pend = List_next(pend);
+ }
+
+ for (p = pstart; p != pend; p = List_next(p)) {
+ segment3 = (Segment_T) List_head(p);
+ assert(segment3->diagonal - segment5->diagonal < pairmax);
+ debug5(printf("Setting plus segments to be pairable: %u and %u (distance %u)\n",
+ segment5->diagonal,segment3->diagonal,segment3->diagonal - segment5->diagonal));
+ segment5->pairablep = true;
+ segment3->pairablep = true;
+ }
}
- */
+
+ /* minus/minus */
+ pstart = minus_anchor_segments_5;
+ for (q = minus_anchor_segments_3; q != NULL && pstart != NULL; q = List_next(q)) {
+ segment3 = (Segment_T) List_head(q);
+ assert(segment3->diagonal != (Univcoord_T) -1);
+ insert_start = segment3->diagonal;
-#if 0
- /* Should account for firstbound and lastbound */
- floors_from_xfirst = floors->scorefrom[/* xfirst_from = */ firstbound-index1interval+max_end_insertions];
- floors_to_xlast = floors->scoreto[/* xlast_to = */ lastbound+1+index1interval-index1part-max_end_insertions];
-#elif 0
- if (spansize /* +max_end_insertions */ > query_lastpos + index1interval) {
- floors_from_xfirst = floors->scorefrom[query_lastpos+index1interval];
- } else {
- floors_from_xfirst = floors->scorefrom[spansize /* +max_end_insertions */];
- }
- if (query_lastpos-spansize /* -max_end_insertions */ < -index1interval) {
- floors_to_xlast = floors->scoreto[-index1interval];
- } else {
- floors_to_xlast = floors->scoreto[query_lastpos-spansize /* -max_end_insertions */];
- }
-#endif
- floors_from_neg3 = floors->scorefrom[-index1interval];
- floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
-
-
- /* Initialize loop */
- batch = heap[1];
- first_querypos = last_querypos = querypos = batch->querypos;
- last_diagonal = diagonal = batch->diagonal;
-
- floor_incr = floors_from_neg3[first_querypos];
-#if 0
- floor = floor_incr;
- floor_xlast = floor_incr;
- floor_xfirst = floors_from_xfirst[first_querypos] /* floors->scorefrom[xfirst_from][first_querypos] */;
-#endif
-
-#ifdef OLD_FLOOR_ENDS
- if (querypos < halfquery_lastpos) {
- floor_left = floor_incr;
- } else {
- floor_left = floors->scorefrom[-index1interval][halfquery_lastpos];
- }
- if (querypos < halfquerylength) {
- floor_right = floors->scorefrom[halfquerylength-index1interval][query_lastpos];
- } else {
- floor_right = floors->scorefrom[halfquerylength-index1interval][first_querypos];
- }
-#else
- floor_left = floor_incr;
-#ifdef DEBUG1
- floor_right = -99;
-#endif
-#endif
-
-
- debug1(printf("multiple_mm_%s, diagonal %llu, querypos %d\n",
- plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos));
- debug1(printf("first_querypos = %d => initial values: floor_left %d, floor_right %d\n",
- first_querypos,floor_left,floor_right));
-
- if (--batch->npositions <= 0) {
- /* Use last entry in heap for insertion */
- batch = heap[heapsize];
- querypos = batch->querypos;
- heap[heapsize--] = sentinel;
-
- } else {
- /* Use this batch for insertion (same querypos) */
-#ifdef LARGE_GENOMES
- batch->diagonal = ((Univcoord_T) *(++batch->positions_high) << 32) + *(++batch->positions_low) + batch->diagterm;
-#elif defined(WORDS_BIGENDIAN)
- batch->diagonal = Bigendian_convert_univcoord(*(++batch->positions)) + batch->diagterm;
-#else
- batch->diagonal = *(++batch->positions) + batch->diagterm;
-#endif
-#ifdef DIAGONAL_ADD_QUERYPOS
- batch->diagonal_add_querypos = (UINT8) batch->diagonal;
- batch->diagonal_add_querypos <<= 32;
- batch->diagonal_add_querypos |= querypos /* Previously added 2 because querypos was -2: + 2*/;
-#endif
- }
-
- /* heapify */
- parenti = 1;
-#ifdef DIAGONAL_ADD_QUERYPOS
- diagonal_add_querypos = batch->diagonal_add_querypos;
- smallesti = (heap[3]->diagonal_add_querypos < heap[2]->diagonal_add_querypos) ? 3 : 2;
- while (diagonal_add_querypos > heap[smallesti]->diagonal_add_querypos) {
- heap[parenti] = heap[smallesti];
- parenti = smallesti;
- smallesti = LEFT(parenti);
- righti = smallesti+1;
- if (heap[righti]->diagonal_add_querypos < heap[smallesti]->diagonal_add_querypos) {
- smallesti = righti;
- }
- }
-#else
- diagonal = batch->diagonal;
- smallesti = ((heap[3]->diagonal < heap[2]->diagonal) ||
- ((heap[3]->diagonal == heap[2]->diagonal) &&
- (heap[3]->querypos < heap[2]->querypos))) ? 3 : 2;
- /* Note that diagonal/querypos will never exceed a sentinel diagonal/querypos */
- while (diagonal > heap[smallesti]->diagonal ||
- (diagonal == heap[smallesti]->diagonal &&
- querypos > heap[smallesti]->querypos)) {
- heap[parenti] = heap[smallesti];
- parenti = smallesti;
- smallesti = LEFT(parenti);
- righti = smallesti+1;
- if ((heap[righti]->diagonal < heap[smallesti]->diagonal) ||
- ((heap[righti]->diagonal == heap[smallesti]->diagonal) &&
- (heap[righti]->querypos < heap[smallesti]->querypos))) {
- smallesti = righti;
- }
- }
-#endif
- heap[parenti] = batch;
-
-
- /* Continue after initialization */
- while (heapsize > 0) {
- batch = heap[1];
- querypos = batch->querypos;
- diagonal = batch->diagonal;
-
- if (diagonal == last_diagonal) {
- /* Continuing exact match or substitution */
- floor_incr = floors->scorefrom[last_querypos][querypos];
-#if 0
- floor += floor_incr;
- floor_xfirst += floor_incr;
- floor_xlast += floor_incr;
-#endif
-
-#ifdef OLD_FLOOR_ENDS
- /* Why is this here? Just set floor_left at start and floor_right at end. */
- if (querypos < halfquery_lastpos) {
- floor_left += floor_incr;
- } else if (last_querypos < halfquery_lastpos) {
- /* Finish floor_left */
- floor_left += floors->scorefrom[last_querypos][halfquery_lastpos+index1interval];
- }
- if (querypos >= halfquerylength) {
- if (last_querypos < halfquerylength) {
- /* Start floor_right */
- floor_right = floors->scorefrom[halfquerylength-index1interval][querypos];
- } else {
- floor_right += floor_incr;
- }
- }
-#endif
-
- debug1(printf("diagonal %llu unchanged: last_querypos = %d, querypos = %d => floor increments by %d\n",
- (unsigned long long) diagonal,last_querypos,querypos,floor_incr));
- debug1(printf("*multiple_mm_%s, diagonal %llu, querypos %d, floor_left %d, floor_right %d\n",
- plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos,floor_left,floor_right));
- } else {
- /* End of diagonal */
- floor_incr = floors_to_pos3[last_querypos] /* floors->score[last_querypos][query_lastpos+index1interval] */;
-#if 0
- floor += floor_incr;
- floor_xfirst += floor_incr;
- floor_xlast += floors_to_xlast[last_querypos]; /* floors->score[last_querypos][xlast_to]; */
-#endif
-
-#ifdef OLD_FLOOR_ENDS
- if (last_querypos < halfquery_lastpos) {
- floor_left += floors->scorefrom[last_querypos][halfquery_lastpos+index1interval];
- floor_right = floors->scorefrom[halfquerylength-index1interval][query_lastpos+index1interval];
- }
- if (last_querypos >= halfquerylength) {
- floor_right += floor_incr;
- }
-#else
- floor_right = floor_incr;
-#endif
-
- debug1(printf("new diagonal %llu > last diagonal %llu: last_querypos = %d => final values: floor_left %d, floor_right %d, chrhigh %u\n",
- (unsigned long long) diagonal,(unsigned long long) last_diagonal,
- last_querypos,floor_left,floor_right,chrhigh));
-
- if (last_diagonal > chrhigh) {
- if (ptr > ptr_chrstart) {
- /* Add chr marker segment */
- debug1(printf("=== ptr %p > ptr_chrstart %p, so adding chr marker segment\n",ptr,ptr_chrstart));
- ptr->diagonal = (Univcoord_T) -1;
- ptr_chrstart = ++ptr;
- }
-
- /* update chromosome bounds, based on low end */
-#ifdef SLOW_CHR_UPDATE
- chrnum = Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength);
- Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
- /* chrhigh += 1; */
-#else
- j = 1;
-#ifdef NO_EXTENSIONS_BEFORE_ZERO
- goal = last_diagonal - querylength + 1;
-#else
- goal = last_diagonal + 1;
-#endif
- while (j < nchromosomes_local && chrhighs_local[j] < goal) {
- j <<= 1; /* gallop by 2 */
- }
- if (j >= nchromosomes_local) {
- j = binary_search(j >> 1,nchromosomes_local,chrhighs_local,goal);
- } else {
- j = binary_search(j >> 1,j,chrhighs_local,goal);
- }
- chrnum += j;
-#ifdef DEBUG15
- if (chrnum != Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength)) {
- fprintf(stderr,"Got chrnum %d, but wanted %d\n",
- chrnum,Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength));
- abort();
- }
-#endif
- chroffset = chroffsets[chrnum-1];
- chrhigh = chrhighs[chrnum-1];
- chrlength = chrlengths[chrnum-1];
- chrhighs_local += j;
- nchromosomes_local -= j;
-#endif
- }
- if (last_diagonal <= chrhigh) { /* FORMULA for high position */
- /* position of high end is within current chromosome */
- debug1(printf(" => multiple_mm, diagonal %llu, query %d..%d, chrbounds %llu..%llu, floor_left %d, floor_right %d\n",
- (unsigned long long) last_diagonal,first_querypos,last_querypos,
- (unsigned long long) chroffset,(unsigned long long) chrhigh,floor_left,floor_right));
- if (floor_left <= max_mismatches_allowed || floor_right <= max_mismatches_allowed) {
- /* Save segment */
- ptr->diagonal = last_diagonal;
- ptr->chrnum = chrnum;
- ptr->chroffset = chroffset;
- ptr->chrhigh = chrhigh;
- ptr->chrlength = chrlength;
- ptr->querypos5 = first_querypos;
- ptr->querypos3 = last_querypos;
-#if 0
- ptr->floor = floor;
- ptr->floor_xfirst = floor_xfirst;
- ptr->floor_xlast = floor_xlast;
-#endif
- ptr->floor_left = floor_left;
- ptr->floor_right = floor_right;
-#if 0
- ptr->leftmost = ptr->rightmost = -1;
- ptr->left_splice_p = ptr->right_splice_p = false;
- ptr->leftspan = ptr->rightspan = -1;
-#endif
- ptr->usedp = false;
- ptr++;
- }
- }
-
- /* Prepare next diagonal */
- first_querypos = querypos;
- last_diagonal = diagonal;
- floor_incr = floors_from_neg3[first_querypos] /* floors->score[-index1interval][first_querypos] */;
-#if 0
- floor = floor_incr;
- floor_xlast = floor_incr;
- floor_xfirst = floors_from_xfirst[first_querypos]; /* floors->score[xfirst_from][first_querypos]; */
-#endif
-
-#ifdef OLD_FLOOR_ENDS
- if (querypos < halfquery_lastpos) {
- floor_left = floor_incr;
- } else {
- floor_left = floors->scorefrom[-index1interval][halfquery_lastpos];
- }
- if (querypos < halfquerylength) {
- floor_right = floors->scorefrom[halfquerylength-index1interval][query_lastpos];
- } else {
- floor_right = floors->scorefrom[halfquerylength-index1interval][first_querypos];
- }
-#else
- floor_left = floor_incr;
-#ifdef DEBUG1
- floor_right = -99;
-#endif
-#endif
-
- debug1(printf("*multiple_mm_%s, diagonal %llu, querypos %d\n",
- plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos));
- debug1(printf("start of diagonal %llu, first_querypos = %d => initial values: floor_left %d, floor_right %d\n",
- (unsigned long long) diagonal,first_querypos,floor_left,floor_right));
+ while (pstart != NULL && ((Segment_T) pstart->first)->diagonal < segment3->diagonal) {
+ pstart = List_next(pstart);
}
- last_querypos = querypos;
+ pend = pstart;
+ while (pend != NULL && ((Segment_T) pend->first)->diagonal < segment3->diagonal + pairmax) {
+ pend = List_next(pend);
+ }
- if (--batch->npositions <= 0) {
- /* Use last entry in heap for insertion */
- batch = heap[heapsize];
- querypos = batch->querypos;
- heap[heapsize--] = sentinel;
-
- } else {
- /* Use this batch for insertion (same querypos) */
-#ifdef LARGE_GENOMES
- batch->diagonal = ((Univcoord_T) *(++batch->positions_high) << 32) + *(++batch->positions_low) + batch->diagterm;
-#elif defined(WORDS_BIGENDIAN)
- batch->diagonal = Bigendian_convert_univcoord(*(++batch->positions)) + batch->diagterm;
-#else
- batch->diagonal = *(++batch->positions) + batch->diagterm;
-#endif
-#ifdef DIAGONAL_ADD_QUERYPOS
- batch->diagonal_add_querypos = (UINT8) batch->diagonal;
- batch->diagonal_add_querypos <<= 32;
- batch->diagonal_add_querypos |= querypos /* Previously added 2 because querypos was -2: + 2*/;
-#endif
- }
-
- /* heapify */
- parenti = 1;
-#ifdef DIAGONAL_ADD_QUERYPOS
- diagonal_add_querypos = batch->diagonal_add_querypos;
- smallesti = (heap[3]->diagonal_add_querypos < heap[2]->diagonal_add_querypos) ? 3 : 2;
- while (diagonal_add_querypos > heap[smallesti]->diagonal_add_querypos) {
- heap[parenti] = heap[smallesti];
- parenti = smallesti;
- smallesti = LEFT(parenti);
- righti = smallesti+1;
- if (heap[righti]->diagonal_add_querypos < heap[smallesti]->diagonal_add_querypos) {
- smallesti = righti;
- }
- }
-#else
- diagonal = batch->diagonal;
- smallesti = ((heap[3]->diagonal < heap[2]->diagonal) ||
- ((heap[3]->diagonal == heap[2]->diagonal) &&
- (heap[3]->querypos < heap[2]->querypos))) ? 3 : 2;
- /* Note that diagonal/querypos will never exceed a sentinel diagonal/querypos */
- while (diagonal > heap[smallesti]->diagonal ||
- (diagonal == heap[smallesti]->diagonal &&
- querypos > heap[smallesti]->querypos)) {
- heap[parenti] = heap[smallesti];
- parenti = smallesti;
- smallesti = LEFT(parenti);
- righti = smallesti+1;
- if ((heap[righti]->diagonal < heap[smallesti]->diagonal) ||
- ((heap[righti]->diagonal == heap[smallesti]->diagonal) &&
- (heap[righti]->querypos < heap[smallesti]->querypos))) {
- smallesti = righti;
- }
- }
-#endif
- heap[parenti] = batch;
- }
-
- /* Terminate loop. */
- floor_incr = floors_to_pos3[last_querypos]; /* floors->score[last_querypos][query_lastpos+index1interval]; */
-#if 0
- floor += floor_incr;
- floor_xfirst += floor_incr;
- floor_xlast += floors_to_xlast[last_querypos]; /* floors->score[last_querypos][xlast_to]; */
-#endif
-
-#ifdef OLD_FLOOR_ENDS
- if (last_querypos < halfquery_lastpos) {
- floor_left += floors->scorefrom[last_querypos][halfquery_lastpos+index1interval];
- floor_right = floors->scorefrom[halfquerylength-index1interval][query_lastpos+index1interval];
- }
- if (last_querypos >= halfquerylength) {
- floor_right += floor_incr;
- }
-#else
- floor_right = floor_incr;
-#endif
-
-
- debug1(printf("no more diagonals: last_querypos = %d => terminal values: floor_left %d, floor_right %d\n",
- last_querypos,floor_left,floor_right));
-
- if (last_diagonal > chrhigh) {
- if (ptr > ptr_chrstart) {
- /* Add chr marker segment */
- debug1(printf("=== ptr %p > ptr_chrstart %p, so adding chr marker segment\n",ptr,ptr_chrstart));
- ptr->diagonal = (Univcoord_T) -1;
- ptr_chrstart = ++ptr;
- }
-
- /* update chromosome bounds, based on low end */
-#ifdef SLOW_CHR_UPDATE
- chrnum = Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength);
- Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
- /* chrhigh += 1; */
-#else
- j = 1;
-#ifdef NO_EXTENSIONS_BEFORE_ZERO
- goal = last_diagonal - querylength + 1;
-#else
- goal = last_diagonal + 1;
-#endif
- while (j < nchromosomes_local && chrhighs_local[j] < goal) {
- j <<= 1; /* gallop by 2 */
- }
- if (j >= nchromosomes_local) {
- j = binary_search(j >> 1,nchromosomes_local,chrhighs_local,goal);
- } else {
- j = binary_search(j >> 1,j,chrhighs_local,goal);
- }
- chrnum += j;
-#ifdef DEBUG15
- if (chrnum != Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength)) {
- fprintf(stderr,"Got chrnum %d, but wanted %d\n",
- chrnum,Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength));
- abort();
- }
-#endif
- chroffset = chroffsets[chrnum-1];
- chrhigh = chrhighs[chrnum-1];
- chrlength = chrlengths[chrnum-1];
- chrhighs_local += j;
- nchromosomes_local -= j;
-#endif
- }
- if (last_diagonal <= chrhigh) { /* FORMULA for high position */
- /* position of high end is within current chromosome */
- debug1(printf(" => multiple_mm, diagonal %llu, query %d..%d, chrbounds %llu..%llu, floor_left %d, floor_right %d\n",
- (unsigned long long) last_diagonal,first_querypos,last_querypos,
- (unsigned long long) chroffset,(unsigned long long) chrhigh,floor_left,floor_right));
- if (floor_left <= max_mismatches_allowed || floor_right <= max_mismatches_allowed) {
- /* Save segment */
- ptr->diagonal = last_diagonal;
- ptr->chrnum = chrnum;
- ptr->chroffset = chroffset;
- ptr->chrhigh = chrhigh;
- ptr->chrlength = chrlength;
- ptr->querypos5 = first_querypos;
- ptr->querypos3 = last_querypos;
-#if 0
- ptr->floor = floor;
- ptr->floor_xfirst = floor_xfirst;
- ptr->floor_xlast = floor_xlast;
-#endif
- ptr->floor_left = floor_left;
- ptr->floor_right = floor_right;
-#if 0
- ptr->leftmost = ptr->rightmost = -1;
- ptr->left_splice_p = ptr->right_splice_p = false;
- ptr->leftspan = ptr->rightspan = -1;
-#endif
- ptr->usedp = false;
- ptr++;
- }
- }
-
- if (ptr > ptr_chrstart) {
- /* Final chr marker segment */
- debug1(printf("=== ptr %p > ptr_chrstart %p, so adding final chr marker segment\n",ptr,ptr_chrstart));
- ptr->diagonal = (Univcoord_T) -1;
- /* ptr_chrstart = */ ++ptr;
- }
-
-
- FREEA(heap);
- FREEA(batchpool);
-
- /* Note: segments is in descending diagonal order. Will need to
- reverse before solving middle deletions */
-
- *nsegments = ptr - segments;
- debug1(printf("nsegments = %d\n",*nsegments));
- debug(printf("nsegments = %d (total_npositions = %d)\n",*nsegments,total_npositions));
+ for (p = pstart; p != pend; p = List_next(p)) {
+ segment5 = (Segment_T) List_head(p);
+ assert(segment5->diagonal - segment3->diagonal < pairmax);
+ debug5(printf("Setting minus segments to be pairable: %u and %u (distance %u)\n",
+ segment3->diagonal,segment5->diagonal,segment5->diagonal - segment3->diagonal));
+ segment3->pairablep = true;
+ segment5->pairablep = true;
+ }
+ }
- assert(*nsegments <= total_npositions + nchromosomes);
+ debug(printf("Exiting pair_up_anchor_segments\n"));
- return segments;
+ return;
}
@@ -5047,8 +4838,8 @@ find_middle_indels (int *found_score, int *nhits, List_T hits,
hits = Indel_solve_middle_deletion(&foundp,&(*found_score),&(*nhits),hits,
/*left*/segmenti->diagonal - querylength,
segmenti->chrnum,segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
- indels,/*query_compress*/query_compress_fwd,
- querylength,max_mismatches_allowed,
+ indels,/*query_compress*/query_compress_fwd,querylength,
+ max_mismatches_allowed,
/*plusp*/true,genestrand,first_read_p,/*sarrayp*/false);
} else {
debug2(printf("too many mismatches, because floor = %d+middle+%d=%d > %d\n",
@@ -5123,8 +4914,8 @@ find_middle_indels (int *found_score, int *nhits, List_T hits,
hits = Indel_solve_middle_deletion(&foundp,&(*found_score),&(*nhits),hits,
/*left*/segmenti->diagonal - querylength,
segmenti->chrnum,segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
- indels,/*query_compress*/query_compress_rev,
- querylength,max_mismatches_allowed,
+ indels,/*query_compress*/query_compress_rev,querylength,
+ max_mismatches_allowed,
/*plusp*/false,genestrand,first_read_p,/*sarrayp*/false);
} else {
debug2(printf("too many mismatches, because floor = %d+middle+%d=%d > %d\n",
@@ -6437,10 +6228,12 @@ solve_end_indel_high (int *found_score, int *nhits, List_T hits, Segment_T ptr,
}
+/* Note: plus_anchor_segments and minus_anchor_segments point to anchors,
+ but can use smaller segments for the ends because ptr points to all
+ of them */
static List_T
find_end_indels (int *found_score, int *nhits, List_T hits,
- struct Segment_T *plus_segments, struct Segment_T *minus_segments,
- int plus_nsegments, int minus_nsegments,
+ List_T plus_anchor_segments, List_T minus_anchor_segments,
#ifdef DEBUG2E
char *queryuc_ptr, char *queryrc,
#endif
@@ -6449,11 +6242,13 @@ find_end_indels (int *found_score, int *nhits, List_T hits,
int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
int indel_penalty_end, int max_mismatches_allowed, int genestrand, bool first_read_p) {
Segment_T ptr;
+ List_T p;
debug(printf("*** find_end_indels with max_mismatches_allowed %d ***\n",
max_mismatches_allowed));
- for (ptr = plus_segments; ptr < &(plus_segments[plus_nsegments]); ptr++) {
+ for (p = plus_anchor_segments; p != NULL; p = List_next(p)) {
+ ptr = (Segment_T) List_head(p);
if (ptr->diagonal < (Univcoord_T) -1) {
if (ptr->floor_xfirst <= max_mismatches_allowed) {
@@ -6484,7 +6279,8 @@ find_end_indels (int *found_score, int *nhits, List_T hits,
}
}
- for (ptr = minus_segments; ptr < &(minus_segments[minus_nsegments]); ptr++) {
+ for (p = minus_anchor_segments; p != NULL; p = List_next(p)) {
+ ptr = (Segment_T) List_head(p);
if (ptr->diagonal < (Univcoord_T) -1) {
if (ptr->floor_xfirst <= max_mismatches_allowed) {
@@ -6694,7 +6490,7 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
Stage3end_T hit, *hitarray;
int n_good_spliceends;
int best_nmismatches, nmismatches, nmismatches_donor, nmismatches_acceptor;
- double best_prob, prob;
+ double best_prob, prob, donor_prob, acceptor_prob;
Substring_T donor, acceptor;
int sensedir;
@@ -6710,230 +6506,214 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
debug4s(printf("*** Starting find_singlesplices_plus on %d spliceable segments ***\n",plus_nspliceable));
/* debug(printf("Initially have %d hits\n",List_length(hits))); */
- floors_from_neg3 = floors->scorefrom[-index1interval];
- floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
-
- for (ptr = plus_spliceable; ptr < &(plus_spliceable[plus_nspliceable]); ptr++) {
- segmenti = *ptr;
- debug4s(printf("plus_spliceable segmenti at diagonal %u\n",segmenti->diagonal));
- if (1 || segmenti->diagonal < (Univcoord_T) -1) { /* No markers were stored in spliceable */
- segmenti_left = segmenti->diagonal - querylength;
- floor_outer_i = floors_from_neg3[segmenti->querypos5];
+ if (floors != NULL) {
+ floors_from_neg3 = floors->scorefrom[-index1interval];
+ floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
- segmenti_donor_nknown = 0;
- segmenti_antiacceptor_nknown = 0;
- max_distance = shortsplicedist;
+ for (ptr = plus_spliceable; ptr < &(plus_spliceable[plus_nspliceable]); ptr++) {
+ segmenti = *ptr;
+ debug4s(printf("plus_spliceable segmenti at diagonal %u\n",segmenti->diagonal));
+ if (1 || segmenti->diagonal < (Univcoord_T) -1) { /* No markers were stored in spliceable */
+ segmenti_left = segmenti->diagonal - querylength;
+ floor_outer_i = floors_from_neg3[segmenti->querypos5];
+
+ segmenti_donor_nknown = 0;
+ segmenti_antiacceptor_nknown = 0;
+ max_distance = shortsplicedist;
+
+ if ((j = segmenti->splicesites_i) >= 0) {
+ /* Ends 1 (donor, plus) and 8 (antiacceptor, plus): mark known splice sites in segmenti */
+ while (j < nsplicesites && splicesites[j] < segmenti->diagonal) {
+ if (splicetypes[j] == DONOR) {
+ debug4s(printf("Setting known donor %d for segmenti at %llu\n",j,(unsigned long long) splicesites[j]));
+ segmenti_donor_knownpos[segmenti_donor_nknown] = splicesites[j] - segmenti_left;
+ segmenti_donor_knowni[segmenti_donor_nknown++] = j;
+ } else if (splicetypes[j] == ANTIACCEPTOR) {
+ debug4s(printf("Setting known antiacceptor %d for segmenti at %llu\n",j,(unsigned long long) splicesites[j]));
+ segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = splicesites[j] - segmenti_left;
+ segmenti_antiacceptor_knowni[segmenti_antiacceptor_nknown++] = j;
+ }
- if ((j = segmenti->splicesites_i) >= 0) {
- /* Ends 1 (donor, plus) and 8 (antiacceptor, plus): mark known splice sites in segmenti */
- while (j < nsplicesites && splicesites[j] < segmenti->diagonal) {
- if (splicetypes[j] == DONOR) {
- debug4s(printf("Setting known donor %d for segmenti at %llu\n",j,(unsigned long long) splicesites[j]));
- segmenti_donor_knownpos[segmenti_donor_nknown] = splicesites[j] - segmenti_left;
- segmenti_donor_knowni[segmenti_donor_nknown++] = j;
- } else if (splicetypes[j] == ANTIACCEPTOR) {
- debug4s(printf("Setting known antiacceptor %d for segmenti at %llu\n",j,(unsigned long long) splicesites[j]));
- segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = splicesites[j] - segmenti_left;
- segmenti_antiacceptor_knowni[segmenti_antiacceptor_nknown++] = j;
- }
+ /* This computation was already made in identify_all_segments */
+ if (splicedists[j] > max_distance) {
+ debug4s(printf("Setting max_distance for known i %d to be %u\n",j,splicedists[j]));
+ max_distance = splicedists[j];
+ }
- /* This computation was already made in identify_all_segments */
- if (splicedists[j] > max_distance) {
- debug4s(printf("Setting max_distance for known i %d to be %u\n",j,splicedists[j]));
- max_distance = splicedists[j];
+ j++;
}
-
- j++;
}
- }
- segmenti_donor_knownpos[segmenti_donor_nknown] = querylength;
- segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = querylength;
+ segmenti_donor_knownpos[segmenti_donor_nknown] = querylength;
+ segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = querylength;
- /* Identify potential segmentj for segmenti */
- segmentj_end = segmenti+1;
- while (
+ /* Identify potential segmentj for segmenti */
+ segmentj_end = segmenti+1;
+ while (
#ifdef NO_MARKER_SEGMENTS
- segmentj_end < &(plus_segments[plus_nsegments]) && segmentj_end->chrnum == segmenti->chrnum &&
+ segmentj_end < &(plus_segments[plus_nsegments]) && segmentj_end->chrnum == segmenti->chrnum &&
#endif
- segmentj_end->diagonal <= segmenti->diagonal + max_distance) {
- segmentj_end++;
- }
-
- spliceends_sense = spliceends_antisense = (List_T) NULL;
+ segmentj_end->diagonal <= segmenti->diagonal + max_distance) {
+ segmentj_end++;
+ }
- if (segmentj_end - segmenti >= MAX_LOCALSPLICING_POTENTIAL) {
- /* Too many to check */
- /* segmentj_end = segmenti+1 + MAX_LOCALSPLICING_POTENTIAL; */
- segmentj = segmentj_end; /* Don't process any */
- } else {
- segmentj = segmenti+1;
- }
- for ( ; segmentj < segmentj_end; segmentj++) {
- debug4s(printf("plus local? diagonal %llu, querypos %d..%d => diagonal %llu, querypos %d..%d => ",
- (unsigned long long) segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
- (unsigned long long) segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
- /* i5 i3 j5 j3 */
- assert(segmenti->diagonal < segmentj->diagonal);
- if (segmenti->querypos3 >= segmentj->querypos5) {
- /* Fail querypos test */
- debug4s(printf("Bad querypos\n"));
-
- } else if (segmenti->diagonal + min_intronlength > segmentj->diagonal) {
- /* Too short to be an intron */
- debug4s(printf("Too short\n"));
+ spliceends_sense = spliceends_antisense = (List_T) NULL;
+ if (segmentj_end - segmenti >= MAX_LOCALSPLICING_POTENTIAL) {
+ /* Too many to check */
+ /* segmentj_end = segmenti+1 + MAX_LOCALSPLICING_POTENTIAL; */
+ segmentj = segmentj_end; /* Don't process any */
} else {
- segmenti->right_splice_p = true;
- segmentj->left_splice_p = true;
- if (floor_outer_i + floors_to_pos3[segmentj->querypos3] > max_mismatches_allowed) {
- /* Fail outer floor test */
- /* floors->score[-index1interval][segmenti->querypos5] +floors->score[segmentj->querypos3][query_lastpos+index1interval] */
-
- debug4s(printf("too many mismatches, outer floor = %d+%d=%d > %d\n",
- floors->scorefrom[-index1interval][segmenti->querypos5],
- floors->scorefrom[segmentj->querypos3][query_lastpos+index1interval],
- floors->scorefrom[-index1interval][segmenti->querypos5] +
- floors->scorefrom[segmentj->querypos3][query_lastpos+index1interval],
- max_mismatches_allowed));
+ segmentj = segmenti+1;
+ }
+ for ( ; segmentj < segmentj_end; segmentj++) {
+ debug4s(printf("plus local? diagonal %llu, querypos %d..%d => diagonal %llu, querypos %d..%d => ",
+ (unsigned long long) segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
+ (unsigned long long) segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
+ /* i5 i3 j5 j3 */
+ assert(segmenti->diagonal < segmentj->diagonal);
+ if (segmenti->querypos3 >= segmentj->querypos5) {
+ /* Fail querypos test */
+ debug4s(printf("Bad querypos\n"));
+
+ } else if (segmenti->diagonal + min_intronlength > segmentj->diagonal) {
+ /* Too short to be an intron */
+ debug4s(printf("Too short\n"));
} else {
- /* Apply leftmost/rightmost test */
- if (segmenti->leftmost < 0) {
- nmismatches_left = Genome_mismatches_left(mismatch_positions_left,max_mismatches_allowed,
- query_compress,/*left*/segmenti_left,/*pos5*/0,/*pos3*/querylength,
- /*plusp*/true,genestrand,first_read_p);
- segmenti->leftmost = (nmismatches_left == 0) ? 0 : mismatch_positions_left[nmismatches_left-1];
- debug4s(printf("%d mismatches on left at:",nmismatches_left);
- for (i = 0; i <= nmismatches_left; i++) {
- printf(" %d",mismatch_positions_left[i]);
- }
- printf("\n"));
- }
-
- segmentj_left = segmentj->diagonal - querylength;
- if (segmentj->rightmost < 0) {
- nmismatches_right = Genome_mismatches_right(mismatch_positions_right,max_mismatches_allowed,
- query_compress,/*left*/segmentj_left,/*pos5*/0,/*pos3*/querylength,
+ segmenti->right_splice_p = true;
+ segmentj->left_splice_p = true;
+ if (floor_outer_i + floors_to_pos3[segmentj->querypos3] > max_mismatches_allowed) {
+ /* Fail outer floor test */
+ /* floors->score[-index1interval][segmenti->querypos5] +floors->score[segmentj->querypos3][query_lastpos+index1interval] */
+
+ debug4s(printf("too many mismatches, outer floor = %d+%d=%d > %d\n",
+ floors->scorefrom[-index1interval][segmenti->querypos5],
+ floors->scorefrom[segmentj->querypos3][query_lastpos+index1interval],
+ floors->scorefrom[-index1interval][segmenti->querypos5] +
+ floors->scorefrom[segmentj->querypos3][query_lastpos+index1interval],
+ max_mismatches_allowed));
+
+ } else {
+ /* Apply leftmost/rightmost test */
+ if (segmenti->leftmost < 0) {
+ nmismatches_left = Genome_mismatches_left(mismatch_positions_left,max_mismatches_allowed,
+ query_compress,/*left*/segmenti_left,/*pos5*/0,/*pos3*/querylength,
/*plusp*/true,genestrand,first_read_p);
- segmentj->rightmost = (nmismatches_right == 0) ? 0 : mismatch_positions_right[nmismatches_right-1];
- debug4s(printf("%d mismatches on right at:",nmismatches_right);
- for (i = 0; i <= nmismatches_right; i++) {
- printf(" %d",mismatch_positions_right[i]);
- }
- printf("\n"));
- }
+ segmenti->leftmost = (nmismatches_left == 0) ? 0 : mismatch_positions_left[nmismatches_left-1];
+ debug4s(printf("%d mismatches on left at:",nmismatches_left);
+ for (i = 0; i <= nmismatches_left; i++) {
+ printf(" %d",mismatch_positions_left[i]);
+ }
+ printf("\n"));
+ }
+
+ segmentj_left = segmentj->diagonal - querylength;
+ if (segmentj->rightmost < 0) {
+ nmismatches_right = Genome_mismatches_right(mismatch_positions_right,max_mismatches_allowed,
+ query_compress,/*left*/segmentj_left,/*pos5*/0,/*pos3*/querylength,
+ /*plusp*/true,genestrand,first_read_p);
+ segmentj->rightmost = (nmismatches_right == 0) ? 0 : mismatch_positions_right[nmismatches_right-1];
+ debug4s(printf("%d mismatches on right at:",nmismatches_right);
+ for (i = 0; i <= nmismatches_right; i++) {
+ printf(" %d",mismatch_positions_right[i]);
+ }
+ printf("\n"));
+ }
- debug4s(printf("For a single splice, want leftmost %d > rightmost %d\n",segmenti->leftmost,segmentj->rightmost));
+ debug4s(printf("For a single splice, want leftmost %d > rightmost %d\n",segmenti->leftmost,segmentj->rightmost));
- if (segmenti->leftmost > segmentj->rightmost) {
- /* Single splice is possible */
-
- segmentj_acceptor_nknown = 0;
- segmentj_antidonor_nknown = 0;
- if ((j = segmentj->splicesites_i) >= 0) {
- /* Ends 2 (acceptor, plus) and 7 (antidonor, plus): mark known splice sites in segmentj */
- while (j < nsplicesites && splicesites[j] < segmentj->diagonal) {
- if (splicetypes[j] == ACCEPTOR) {
- debug4s(printf("Setting known acceptor %d for segmentj at %llu\n",j,(unsigned long long) splicesites[j]));
- segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = splicesites[j] - segmentj_left;
- segmentj_acceptor_knowni[segmentj_acceptor_nknown++] = j;
- } else if (splicetypes[j] == ANTIDONOR) {
- debug4s(printf("Setting known antidonor %d for segmentj at %llu\n",j,(unsigned long long) splicesites[j]));
- segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = splicesites[j] - segmentj_left;
- segmentj_antidonor_knowni[segmentj_antidonor_nknown++] = j;
+ if (segmenti->leftmost > segmentj->rightmost) {
+ /* Single splice is possible */
+
+ segmentj_acceptor_nknown = 0;
+ segmentj_antidonor_nknown = 0;
+ if ((j = segmentj->splicesites_i) >= 0) {
+ /* Ends 2 (acceptor, plus) and 7 (antidonor, plus): mark known splice sites in segmentj */
+ while (j < nsplicesites && splicesites[j] < segmentj->diagonal) {
+ if (splicetypes[j] == ACCEPTOR) {
+ debug4s(printf("Setting known acceptor %d for segmentj at %llu\n",j,(unsigned long long) splicesites[j]));
+ segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = splicesites[j] - segmentj_left;
+ segmentj_acceptor_knowni[segmentj_acceptor_nknown++] = j;
+ } else if (splicetypes[j] == ANTIDONOR) {
+ debug4s(printf("Setting known antidonor %d for segmentj at %llu\n",j,(unsigned long long) splicesites[j]));
+ segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = splicesites[j] - segmentj_left;
+ segmentj_antidonor_knowni[segmentj_antidonor_nknown++] = j;
+ }
+ j++;
}
- j++;
}
+ segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = querylength;
+ segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength;
+
+
+ debug4s(printf(" => checking for single splice: Splice_solve_single_plus\n"));
+ spliceends_sense =
+ Splice_solve_single_sense(&(*found_score),&nhits_local,spliceends_sense,&(*lowprob),
+ &segmenti->usedp,&segmentj->usedp,
+ /*segmenti_left*/segmenti->diagonal - querylength,
+ /*segmentj_left*/segmentj->diagonal - querylength,
+ segmenti->chrnum,segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
+ segmentj->chrnum,segmentj->chroffset,segmentj->chrhigh,segmentj->chrlength,
+ querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,max_mismatches_allowed,
+ /*plusp*/true,genestrand,first_read_p,subs_or_indels_p,
+ /*sarrayp*/false);
+ spliceends_antisense =
+ Splice_solve_single_antisense(&(*found_score),&nhits_local,spliceends_antisense,&(*lowprob),
+ &segmenti->usedp,&segmentj->usedp,
+ /*segmenti_left*/segmenti->diagonal - querylength,
+ /*segmentj_left*/segmentj->diagonal - querylength,
+ segmenti->chrnum,segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
+ segmentj->chrnum,segmentj->chroffset,segmentj->chrhigh,segmentj->chrlength,
+ querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,max_mismatches_allowed,
+ /*plusp*/true,genestrand,first_read_p,subs_or_indels_p,
+ /*sarrayp*/false);
}
- segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = querylength;
- segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength;
-
-
- debug4s(printf(" => checking for single splice: Splice_solve_single_plus\n"));
- spliceends_sense =
- Splice_solve_single_sense(&(*found_score),&nhits_local,spliceends_sense,&(*lowprob),
- &segmenti->usedp,&segmentj->usedp,
- /*segmenti_left*/segmenti->diagonal - querylength,
- /*segmentj_left*/segmentj->diagonal - querylength,
- segmenti->chrnum,segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
- segmentj->chrnum,segmentj->chroffset,segmentj->chrhigh,segmentj->chrlength,
- querylength,query_compress,
- segmenti_donor_knownpos,segmentj_acceptor_knownpos,
- segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
- segmenti_donor_knowni,segmentj_acceptor_knowni,
- segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
- segmenti_donor_nknown,segmentj_acceptor_nknown,
- segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
- splicing_penalty,max_mismatches_allowed,
- /*plusp*/true,genestrand,first_read_p,subs_or_indels_p,
- /*sarrayp*/false);
- spliceends_antisense =
- Splice_solve_single_antisense(&(*found_score),&nhits_local,spliceends_antisense,&(*lowprob),
- &segmenti->usedp,&segmentj->usedp,
- /*segmenti_left*/segmenti->diagonal - querylength,
- /*segmentj_left*/segmentj->diagonal - querylength,
- segmenti->chrnum,segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
- segmentj->chrnum,segmentj->chroffset,segmentj->chrhigh,segmentj->chrlength,
- querylength,query_compress,
- segmenti_donor_knownpos,segmentj_acceptor_knownpos,
- segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
- segmenti_donor_knowni,segmentj_acceptor_knowni,
- segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
- segmenti_donor_nknown,segmentj_acceptor_nknown,
- segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
- splicing_penalty,max_mismatches_allowed,
- /*plusp*/true,genestrand,first_read_p,subs_or_indels_p,
- /*sarrayp*/false);
}
}
}
- }
-
- /* Process results for segmenti, sense. Modified from collect_elt_matches in sarray-read.c. */
- if (spliceends_sense != NULL) {
- /* nmismatches here may be different for spliceends from Splice_solve, so pick based on prob and nmismatches */
- best_nmismatches = querylength;
- best_prob = 0.0;
- for (p = spliceends_sense; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- debug7(printf("analyzing distance %d, donor length %d (%llu..%llu) and acceptor length %d (%llu..%llu), nmismatches %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
- Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
- Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
- Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
- Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
- if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
- best_nmismatches = nmismatches;
- }
- if ((prob = Stage3end_chimera_prob(hit)) > best_prob) {
- best_prob = prob;
- }
- }
- n_good_spliceends = 0;
- accepted_hits = rejected_hits = (List_T) NULL;
- for (p = spliceends_sense; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
- Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
- debug7(printf("accepting distance %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ /* Process results for segmenti, sense. Modified from collect_elt_matches in sarray-read.c. */
+ if (spliceends_sense != NULL) {
+ /* nmismatches here may be different for spliceends from Splice_solve, so pick based on prob and nmismatches */
+ best_nmismatches = querylength;
+ best_prob = 0.0;
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ debug7(printf("analyzing distance %d, donor length %d (%llu..%llu) and acceptor length %d (%llu..%llu), nmismatches %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
+ Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
+ Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
+ Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
+ Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
- n_good_spliceends += 1;
- accepted_hits = List_push(accepted_hits,(void *) hit);
- } else {
- rejected_hits = List_push(rejected_hits,(void *) hit);
+ if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
+ best_nmismatches = nmismatches;
+ }
+ if ((prob = Stage3end_chimera_prob(hit)) > best_prob) {
+ best_prob = prob;
+ }
}
- }
- if (n_good_spliceends == 0) {
- /* Conjunction is too strict. Allow for disjunction instead. */
- List_free(&rejected_hits);
+ n_good_spliceends = 0;
+ accepted_hits = rejected_hits = (List_T) NULL;
for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
- if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP ||
+ if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
debug7(printf("accepting distance %d, probabilities %f and %f\n",
Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
@@ -6944,231 +6724,231 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
rejected_hits = List_push(rejected_hits,(void *) hit);
}
}
- }
- for (p = rejected_hits; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- Stage3end_free(&hit);
- }
- List_free(&rejected_hits);
- List_free(&spliceends_sense);
-
- if (n_good_spliceends == 1) {
- hits = List_push(hits,List_head(accepted_hits));
- List_free(&accepted_hits);
-
- } else {
- /* 1. Multiple hits, sense, left1 (segmenti_left) */
- debug7(printf("multiple splice hits, sense, plus\n"));
- donor_hits = acceptor_hits = (List_T) NULL;
+ if (n_good_spliceends == 0) {
+ /* Conjunction is too strict. Allow for disjunction instead. */
+ List_free(&rejected_hits);
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP ||
+ Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
+ debug7(printf("accepting distance %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ n_good_spliceends += 1;
+ accepted_hits = List_push(accepted_hits,(void *) hit);
+ } else {
+ rejected_hits = List_push(rejected_hits,(void *) hit);
+ }
+ }
+ }
- /* plus branch from collect_elt_matches */
- for (p = accepted_hits; p != NULL; p = List_next(p)) {
+ for (p = rejected_hits; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
- donor = Stage3end_substring_donor(hit);
- acceptor = Stage3end_substring_acceptor(hit);
- if (Substring_genomicstart(donor) == segmenti_left) {
- donor_hits = List_push(donor_hits,(void *) hit);
- } else if (Substring_genomicstart(acceptor) == segmenti_left) {
- acceptor_hits = List_push(acceptor_hits,(void *) hit);
- } else {
- abort();
- Stage3end_free(&hit);
- }
+ Stage3end_free(&hit);
}
+ List_free(&rejected_hits);
+ List_free(&spliceends_sense);
- if (donor_hits != NULL) {
- hitarray = (Stage3end_T *) List_to_array_n(&n,donor_hits);
- qsort(hitarray,n,sizeof(Stage3end_T),donor_match_length_cmp);
- i = 0;
- while (i < n) {
- hit = hitarray[i];
+ if (n_good_spliceends == 1) {
+ hits = List_push(hits,List_head(accepted_hits));
+ List_free(&accepted_hits);
+
+ } else {
+ /* 1. Multiple hits, sense, left1 (segmenti_left) */
+ debug7(printf("multiple splice hits, sense, plus\n"));
+ donor_hits = acceptor_hits = (List_T) NULL;
+
+ /* plus branch from collect_elt_matches */
+ for (p = accepted_hits; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
donor = Stage3end_substring_donor(hit);
- donor_length = Substring_match_length_orig(donor);
- j = i + 1;
- while (j < n && Substring_match_length_orig(Stage3end_substring_donor(hitarray[j])) == donor_length) {
- j++;
- }
- if (j == i + 1) {
- hits = List_push(hits,(void *) hit);
+ acceptor = Stage3end_substring_acceptor(hit);
+ if (Substring_genomicstart(donor) == segmenti_left) {
+ donor_hits = List_push(donor_hits,(void *) hit);
+ } else if (Substring_genomicstart(acceptor) == segmenti_left) {
+ acceptor_hits = List_push(acceptor_hits,(void *) hit);
} else {
+ abort();
+ Stage3end_free(&hit);
+ }
+ }
+
+ if (donor_hits != NULL) {
+ hitarray = (Stage3end_T *) List_to_array_n(&n,donor_hits);
+ qsort(hitarray,n,sizeof(Stage3end_T),donor_match_length_cmp);
+ i = 0;
+ while (i < n) {
+ hit = hitarray[i];
+ donor = Stage3end_substring_donor(hit);
+ donor_length = Substring_match_length_orig(donor);
+ j = i + 1;
+ while (j < n && Substring_match_length_orig(Stage3end_substring_donor(hitarray[j])) == donor_length) {
+ j++;
+ }
+ if (j == i + 1) {
+ hits = List_push(hits,(void *) hit);
+ } else {
#ifdef LARGE_GENOMES
- ambcoords = (Uint8list_T) NULL;
+ ambcoords = (Uint8list_T) NULL;
#else
- ambcoords = (Uintlist_T) NULL;
+ ambcoords = (Uintlist_T) NULL;
#endif
- amb_knowni = (Intlist_T) NULL;
- amb_nmismatches = (Intlist_T) NULL;
- amb_probs = (Doublelist_T) NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+ amb_probs = (Doublelist_T) NULL;
- for (k = i; k < j; k++) {
- acceptor = Stage3end_substring_acceptor(hitarray[k]);
+ for (k = i; k < j; k++) {
+ acceptor = Stage3end_substring_acceptor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
#endif
- amb_knowni = Intlist_push(amb_knowni,-1);
- amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor));
- }
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
+ amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor));
+ }
- nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
- prob = best_prob - Substring_chimera_prob(donor);
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
- donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_length*/Substring_match_length_orig(acceptor),/*amb_prob*/prob,
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,amb_knowni,
- /*amb_nmismatches_donor*/NULL,amb_nmismatches,
- /*amb_probs_donor*/NULL,amb_probs,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- Stage3end_sensedir(hit),/*sarrayp*/false));
- Doublelist_free(&amb_probs);
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
+ nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
+ donor_prob = Junction_donor_prob(Stage3end_junctionA(hit));
+ prob = best_prob - donor_prob;
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
+ donor,/*acceptor*/NULL,donor_prob,/*acceptor_prob*/prob,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(acceptor),/*amb_prob*/prob,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donor*/NULL,amb_nmismatches,
+ /*amb_probs_donor*/NULL,amb_probs,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/false));
+ Doublelist_free(&amb_probs);
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords);
+ Uint8list_free(&ambcoords);
#else
- Uintlist_free(&ambcoords);
+ Uintlist_free(&ambcoords);
#endif
- for (k = i; k < j; k++) {
- hit = hitarray[k];
- Stage3end_free(&hit);
+ for (k = i; k < j; k++) {
+ hit = hitarray[k];
+ Stage3end_free(&hit);
+ }
}
- }
- i = j;
+ i = j;
+ }
+ FREE(hitarray);
+ List_free(&donor_hits);
}
- FREE(hitarray);
- List_free(&donor_hits);
- }
- if (acceptor_hits != NULL) {
- hitarray = (Stage3end_T *) List_to_array_n(&n,acceptor_hits);
- qsort(hitarray,n,sizeof(Stage3end_T),acceptor_match_length_cmp);
- i = 0;
- while (i < n) {
- hit = hitarray[i];
- acceptor = Stage3end_substring_acceptor(hit);
- acceptor_length = Substring_match_length_orig(acceptor);
- j = i + 1;
- while (j < n && Substring_match_length_orig(Stage3end_substring_acceptor(hitarray[j])) == acceptor_length) {
- j++;
- }
- if (j == i + 1) {
- hits = List_push(hits,(void *) hit);
- } else {
+ if (acceptor_hits != NULL) {
+ hitarray = (Stage3end_T *) List_to_array_n(&n,acceptor_hits);
+ qsort(hitarray,n,sizeof(Stage3end_T),acceptor_match_length_cmp);
+ i = 0;
+ while (i < n) {
+ hit = hitarray[i];
+ acceptor = Stage3end_substring_acceptor(hit);
+ acceptor_length = Substring_match_length_orig(acceptor);
+ j = i + 1;
+ while (j < n && Substring_match_length_orig(Stage3end_substring_acceptor(hitarray[j])) == acceptor_length) {
+ j++;
+ }
+ if (j == i + 1) {
+ hits = List_push(hits,(void *) hit);
+ } else {
#ifdef LARGE_GENOMES
- ambcoords = (Uint8list_T) NULL;
+ ambcoords = (Uint8list_T) NULL;
#else
- ambcoords = (Uintlist_T) NULL;
+ ambcoords = (Uintlist_T) NULL;
#endif
- amb_knowni = (Intlist_T) NULL;
- amb_nmismatches = (Intlist_T) NULL;
- amb_probs = (Doublelist_T) NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+ amb_probs = (Doublelist_T) NULL;
- for (k = i; k < j; k++) {
- donor = Stage3end_substring_donor(hitarray[k]);
+ for (k = i; k < j; k++) {
+ donor = Stage3end_substring_donor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
#endif
- amb_knowni = Intlist_push(amb_knowni,-1);
- amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor));
- }
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
+ amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor));
+ }
- nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
- prob = best_prob - Substring_chimera_prob(acceptor);
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
- /*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_length*/Substring_match_length_orig(donor),/*amb_prob*/prob,
- ambcoords,/*ambcoords_acceptor*/NULL,
- amb_knowni,/*amb_knowni_acceptor*/NULL,
- amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
- amb_probs,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- Stage3end_sensedir(hit),/*sarrayp*/false));
- Doublelist_free(&amb_probs);
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
+ nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
+ acceptor_prob = Junction_acceptor_prob(Stage3end_junctionD(hit));
+ prob = best_prob - acceptor_prob;
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
+ /*donor*/NULL,acceptor,/*donor_prob*/prob,acceptor_prob,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(donor),/*amb_prob*/prob,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
+ amb_probs,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/false));
+ Doublelist_free(&amb_probs);
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords);
+ Uint8list_free(&ambcoords);
#else
- Uintlist_free(&ambcoords);
+ Uintlist_free(&ambcoords);
#endif
- for (k = i; k < j; k++) {
- hit = hitarray[k];
- Stage3end_free(&hit);
+ for (k = i; k < j; k++) {
+ hit = hitarray[k];
+ Stage3end_free(&hit);
+ }
}
- }
- i = j;
+ i = j;
+ }
+ FREE(hitarray);
+ List_free(&acceptor_hits);
}
- FREE(hitarray);
- List_free(&acceptor_hits);
- }
-
- List_free(&accepted_hits);
- }
- }
- /* Process results for segmenti, antisense. Modified from collect_elt_matches in sarray-read.c. */
- if (spliceends_antisense != NULL) {
- /* nmismatches here may be different for spliceends from Splice_solve, so pick based on prob and nmismatches */
- best_nmismatches = querylength;
- best_prob = 0.0;
- for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- debug7(printf("analyzing distance %d, donor length %d (%llu..%llu) and acceptor length %d (%llu..%llu), nmismatches %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
- Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
- Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
- Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
- Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
- if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
- best_nmismatches = nmismatches;
- }
- if ((prob = Stage3end_chimera_prob(hit)) > best_prob) {
- best_prob = prob;
+ List_free(&accepted_hits);
}
}
- n_good_spliceends = 0;
- accepted_hits = rejected_hits = (List_T) NULL;
- for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
- Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
- debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
+ /* Process results for segmenti, antisense. Modified from collect_elt_matches in sarray-read.c. */
+ if (spliceends_antisense != NULL) {
+ /* nmismatches here may be different for spliceends from Splice_solve, so pick based on prob and nmismatches */
+ best_nmismatches = querylength;
+ best_prob = 0.0;
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ debug7(printf("analyzing distance %d, donor length %d (%llu..%llu) and acceptor length %d (%llu..%llu), nmismatches %d, probabilities %f and %f\n",
Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
+ Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
- Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
+ Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
- n_good_spliceends += 1;
- accepted_hits = List_push(accepted_hits,(void *) hit);
- } else {
- rejected_hits = List_push(rejected_hits,(void *) hit);
+ if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
+ best_nmismatches = nmismatches;
+ }
+ if ((prob = Stage3end_chimera_prob(hit)) > best_prob) {
+ best_prob = prob;
+ }
}
- }
- if (n_good_spliceends == 0) {
- /* Conjunction is too strict. Allow for disjunction instead. */
- List_free(&rejected_hits);
+ n_good_spliceends = 0;
+ accepted_hits = rejected_hits = (List_T) NULL;
for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
- if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP ||
+ if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
@@ -7181,185 +6961,207 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
rejected_hits = List_push(rejected_hits,(void *) hit);
}
}
- }
- for (p = rejected_hits; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- Stage3end_free(&hit);
- }
- List_free(&rejected_hits);
- List_free(&spliceends_antisense);
-
- if (n_good_spliceends == 1) {
- hits = List_push(hits,List_head(accepted_hits));
- List_free(&accepted_hits);
-
- } else {
- /* 2. Multiple hits, antisense, left1 (segmenti_left) */
- debug7(printf("multiple splice hits, antisense, plus\n"));
- donor_hits = acceptor_hits = (List_T) NULL;
+ if (n_good_spliceends == 0) {
+ /* Conjunction is too strict. Allow for disjunction instead. */
+ List_free(&rejected_hits);
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP ||
+ Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
+ debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
+ Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
+ Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ n_good_spliceends += 1;
+ accepted_hits = List_push(accepted_hits,(void *) hit);
+ } else {
+ rejected_hits = List_push(rejected_hits,(void *) hit);
+ }
+ }
+ }
- /* plus branch from collect_elt_matches */
- for (p = accepted_hits; p != NULL; p = List_next(p)) {
+ for (p = rejected_hits; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
- donor = Stage3end_substring_donor(hit);
- acceptor = Stage3end_substring_acceptor(hit);
- if (Substring_genomicstart(donor) == segmenti_left) {
- donor_hits = List_push(donor_hits,(void *) hit);
- } else if (Substring_genomicstart(acceptor) == segmenti_left) {
- acceptor_hits = List_push(acceptor_hits,(void *) hit);
- } else {
- abort();
- Stage3end_free(&hit);
- }
+ Stage3end_free(&hit);
}
+ List_free(&rejected_hits);
+ List_free(&spliceends_antisense);
- if (donor_hits != NULL) {
- hitarray = (Stage3end_T *) List_to_array_n(&n,donor_hits);
- qsort(hitarray,n,sizeof(Stage3end_T),donor_match_length_cmp);
- i = 0;
- while (i < n) {
- hit = hitarray[i];
+ if (n_good_spliceends == 1) {
+ hits = List_push(hits,List_head(accepted_hits));
+ List_free(&accepted_hits);
+
+ } else {
+ /* 2. Multiple hits, antisense, left1 (segmenti_left) */
+ debug7(printf("multiple splice hits, antisense, plus\n"));
+ donor_hits = acceptor_hits = (List_T) NULL;
+
+ /* plus branch from collect_elt_matches */
+ for (p = accepted_hits; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
donor = Stage3end_substring_donor(hit);
- donor_length = Substring_match_length_orig(donor);
- j = i + 1;
- while (j < n && Substring_match_length_orig(Stage3end_substring_donor(hitarray[j])) == donor_length) {
- j++;
- }
- if (j == i + 1) {
- hits = List_push(hits,(void *) hit);
+ acceptor = Stage3end_substring_acceptor(hit);
+ if (Substring_genomicstart(donor) == segmenti_left) {
+ donor_hits = List_push(donor_hits,(void *) hit);
+ } else if (Substring_genomicstart(acceptor) == segmenti_left) {
+ acceptor_hits = List_push(acceptor_hits,(void *) hit);
} else {
+ abort();
+ Stage3end_free(&hit);
+ }
+ }
+
+ if (donor_hits != NULL) {
+ hitarray = (Stage3end_T *) List_to_array_n(&n,donor_hits);
+ qsort(hitarray,n,sizeof(Stage3end_T),donor_match_length_cmp);
+ i = 0;
+ while (i < n) {
+ hit = hitarray[i];
+ donor = Stage3end_substring_donor(hit);
+ donor_length = Substring_match_length_orig(donor);
+ j = i + 1;
+ while (j < n && Substring_match_length_orig(Stage3end_substring_donor(hitarray[j])) == donor_length) {
+ j++;
+ }
+ if (j == i + 1) {
+ hits = List_push(hits,(void *) hit);
+ } else {
#ifdef LARGE_GENOMES
- ambcoords = (Uint8list_T) NULL;
+ ambcoords = (Uint8list_T) NULL;
#else
- ambcoords = (Uintlist_T) NULL;
+ ambcoords = (Uintlist_T) NULL;
#endif
- amb_knowni = (Intlist_T) NULL;
- amb_nmismatches = (Intlist_T) NULL;
- amb_probs = (Doublelist_T) NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+ amb_probs = (Doublelist_T) NULL;
- for (k = i; k < j; k++) {
- acceptor = Stage3end_substring_acceptor(hitarray[k]);
+ for (k = i; k < j; k++) {
+ acceptor = Stage3end_substring_acceptor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
#endif
- amb_knowni = Intlist_push(amb_knowni,-1);
- amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor));
- }
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
+ amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor));
+ }
- nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
- prob = best_prob - Substring_chimera_prob(donor);
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
- donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_length*/Substring_match_length_orig(acceptor),/*amb_prob*/prob,
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,amb_knowni,
- /*amb_nmismatches_donort*/NULL,amb_nmismatches,
- /*amb_probs_donor*/NULL,amb_probs,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- Stage3end_sensedir(hit),/*sarrayp*/false));
- Doublelist_free(&amb_probs);
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
+ nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
+ donor_prob = Junction_donor_prob(Stage3end_junctionA(hit));
+ prob = best_prob - donor_prob;
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
+ donor,/*acceptor*/NULL,donor_prob,/*acceptor_prob*/prob,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(acceptor),/*amb_prob*/prob,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donort*/NULL,amb_nmismatches,
+ /*amb_probs_donor*/NULL,amb_probs,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/false));
+ Doublelist_free(&amb_probs);
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords);
+ Uint8list_free(&ambcoords);
#else
- Uintlist_free(&ambcoords);
+ Uintlist_free(&ambcoords);
#endif
- for (k = i; k < j; k++) {
- hit = hitarray[k];
- Stage3end_free(&hit);
+ for (k = i; k < j; k++) {
+ hit = hitarray[k];
+ Stage3end_free(&hit);
+ }
}
- }
- i = j;
+ i = j;
+ }
+ FREE(hitarray);
+ List_free(&donor_hits);
}
- FREE(hitarray);
- List_free(&donor_hits);
- }
- if (acceptor_hits != NULL) {
- hitarray = (Stage3end_T *) List_to_array_n(&n,acceptor_hits);
- qsort(hitarray,n,sizeof(Stage3end_T),acceptor_match_length_cmp);
- i = 0;
- while (i < n) {
- hit = hitarray[i];
- acceptor = Stage3end_substring_acceptor(hit);
- acceptor_length = Substring_match_length_orig(acceptor);
- j = i + 1;
- while (j < n && Substring_match_length_orig(Stage3end_substring_acceptor(hitarray[j])) == acceptor_length) {
- j++;
- }
- if (j == i + 1) {
- hits = List_push(hits,(void *) hit);
- } else {
+ if (acceptor_hits != NULL) {
+ hitarray = (Stage3end_T *) List_to_array_n(&n,acceptor_hits);
+ qsort(hitarray,n,sizeof(Stage3end_T),acceptor_match_length_cmp);
+ i = 0;
+ while (i < n) {
+ hit = hitarray[i];
+ acceptor = Stage3end_substring_acceptor(hit);
+ acceptor_length = Substring_match_length_orig(acceptor);
+ j = i + 1;
+ while (j < n && Substring_match_length_orig(Stage3end_substring_acceptor(hitarray[j])) == acceptor_length) {
+ j++;
+ }
+ if (j == i + 1) {
+ hits = List_push(hits,(void *) hit);
+ } else {
#ifdef LARGE_GENOMES
- ambcoords = (Uint8list_T) NULL;
+ ambcoords = (Uint8list_T) NULL;
#else
- ambcoords = (Uintlist_T) NULL;
+ ambcoords = (Uintlist_T) NULL;
#endif
- amb_knowni = (Intlist_T) NULL;
- amb_nmismatches = (Intlist_T) NULL;
- amb_probs = (Doublelist_T) NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+ amb_probs = (Doublelist_T) NULL;
- for (k = i; k < j; k++) {
- donor = Stage3end_substring_donor(hitarray[k]);
+ for (k = i; k < j; k++) {
+ donor = Stage3end_substring_donor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
#endif
- amb_knowni = Intlist_push(amb_knowni,-1);
- amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor));
- }
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
+ amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor));
+ }
- nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
- prob = best_prob - Substring_chimera_prob(acceptor);
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
- /*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_length*/Substring_match_length_orig(donor),/*amb_prob*/prob,
- ambcoords,/*ambcoords_acceptor*/NULL,
- amb_knowni,/*amb_knowni_acceptor*/NULL,
- amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
- amb_probs,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- Stage3end_sensedir(hit),/*sarrayp*/false));
- Doublelist_free(&amb_probs);
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
+ nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
+ acceptor_prob = Junction_acceptor_prob(Stage3end_junctionD(hit));
+ prob = best_prob - acceptor_prob;
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
+ /*donor*/NULL,acceptor,/*donor_prob*/prob,acceptor_prob,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(donor),/*amb_prob*/prob,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
+ amb_probs,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/false));
+ Doublelist_free(&amb_probs);
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords);
+ Uint8list_free(&ambcoords);
#else
- Uintlist_free(&ambcoords);
+ Uintlist_free(&ambcoords);
#endif
- for (k = i; k < j; k++) {
- hit = hitarray[k];
- Stage3end_free(&hit);
+ for (k = i; k < j; k++) {
+ hit = hitarray[k];
+ Stage3end_free(&hit);
+ }
}
- }
- i = j;
+ i = j;
+ }
+ FREE(hitarray);
+ List_free(&acceptor_hits);
}
- FREE(hitarray);
- List_free(&acceptor_hits);
- }
- List_free(&accepted_hits);
+ List_free(&accepted_hits);
+ }
}
- }
+ }
}
}
@@ -7415,7 +7217,7 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
Stage3end_T hit, *hitarray;
int n_good_spliceends;
int best_nmismatches, nmismatches, nmismatches_donor, nmismatches_acceptor;
- double best_prob, prob;
+ double best_prob, prob, donor_prob, acceptor_prob;
Substring_T donor, acceptor;
int sensedir;
@@ -7431,229 +7233,213 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
debug4s(printf("*** Starting find_singlesplices_minus on %d spliceable segments ***\n",minus_nspliceable));
/* debug(printf("Initially have %d hits\n",List_length(hits))); */
- floors_from_neg3 = floors->scorefrom[-index1interval];
- floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
+ if (floors != NULL) {
+ floors_from_neg3 = floors->scorefrom[-index1interval];
+ floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
- for (ptr = minus_spliceable; ptr < &(minus_spliceable[minus_nspliceable]); ptr++) {
- segmenti = *ptr;
- debug4s(printf("minus_spliceable segmenti at diagonal %u\n",segmenti->diagonal));
- if (1 || segmenti->diagonal < (Univcoord_T) -1) { /* No markers were stored in spliceable */
- segmenti_left = segmenti->diagonal - querylength;
- floor_outer_i = floors_to_pos3[segmenti->querypos3];
-
- segmenti_antiacceptor_nknown = 0;
- segmenti_donor_nknown = 0;
- max_distance = shortsplicedist;
-
- if ((j = segmenti->splicesites_i) >= 0) {
- /* Ends 4 and 5: mark known splice sites in segmenti */
- while (j < nsplicesites && splicesites[j] < segmenti->diagonal) {
- if (splicetypes[j] == ANTIACCEPTOR) {
- debug4s(printf("Setting known antiacceptor %d for segmenti at %llu\n",j,(unsigned long long) splicesites[j]));
- segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = splicesites[j] - segmenti_left;
- segmenti_antiacceptor_knowni[segmenti_antiacceptor_nknown++] = j;
- } else if (splicetypes[j] == DONOR) {
- debug4s(printf("Setting known donor %d for segmenti at %llu\n",j,(unsigned long long) splicesites[j]));
- segmenti_donor_knownpos[segmenti_donor_nknown] = splicesites[j] - segmenti_left;
- segmenti_donor_knowni[segmenti_donor_nknown++] = j;
- }
+ for (ptr = minus_spliceable; ptr < &(minus_spliceable[minus_nspliceable]); ptr++) {
+ segmenti = *ptr;
+ debug4s(printf("minus_spliceable segmenti at diagonal %u\n",segmenti->diagonal));
+ if (1 || segmenti->diagonal < (Univcoord_T) -1) { /* No markers were stored in spliceable */
+ segmenti_left = segmenti->diagonal - querylength;
+ floor_outer_i = floors_to_pos3[segmenti->querypos3];
+
+ segmenti_antiacceptor_nknown = 0;
+ segmenti_donor_nknown = 0;
+ max_distance = shortsplicedist;
+
+ if ((j = segmenti->splicesites_i) >= 0) {
+ /* Ends 4 and 5: mark known splice sites in segmenti */
+ while (j < nsplicesites && splicesites[j] < segmenti->diagonal) {
+ if (splicetypes[j] == ANTIACCEPTOR) {
+ debug4s(printf("Setting known antiacceptor %d for segmenti at %llu\n",j,(unsigned long long) splicesites[j]));
+ segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = splicesites[j] - segmenti_left;
+ segmenti_antiacceptor_knowni[segmenti_antiacceptor_nknown++] = j;
+ } else if (splicetypes[j] == DONOR) {
+ debug4s(printf("Setting known donor %d for segmenti at %llu\n",j,(unsigned long long) splicesites[j]));
+ segmenti_donor_knownpos[segmenti_donor_nknown] = splicesites[j] - segmenti_left;
+ segmenti_donor_knowni[segmenti_donor_nknown++] = j;
+ }
- /* This computation was already made in identify_all_segments */
- if (splicedists[j] > max_distance) {
- debug4s(printf("Setting max_distance for known %d to be %u\n",j,splicedists[j]));
- max_distance = splicedists[j];
- }
+ /* This computation was already made in identify_all_segments */
+ if (splicedists[j] > max_distance) {
+ debug4s(printf("Setting max_distance for known %d to be %u\n",j,splicedists[j]));
+ max_distance = splicedists[j];
+ }
- j++;
+ j++;
+ }
}
- }
- segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = querylength;
- segmenti_donor_knownpos[segmenti_donor_nknown] = querylength;
+ segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = querylength;
+ segmenti_donor_knownpos[segmenti_donor_nknown] = querylength;
- /* Identify potential segmentj for segmenti */
- segmentj_end = segmenti+1;
- while (
+ /* Identify potential segmentj for segmenti */
+ segmentj_end = segmenti+1;
+ while (
#ifdef NO_MARKER_SEGMENTS
- segmentj_end < &(minus_segments[minus_nsegments]) && segmentj_end->chrnum == segmenti->chrnum &&
+ segmentj_end < &(minus_segments[minus_nsegments]) && segmentj_end->chrnum == segmenti->chrnum &&
#endif
- segmentj_end->diagonal <= segmenti->diagonal + max_distance) {
- segmentj_end++;
- }
-
+ segmentj_end->diagonal <= segmenti->diagonal + max_distance) {
+ segmentj_end++;
+ }
- spliceends_sense = spliceends_antisense = (List_T) NULL;
- if (segmentj_end - segmenti >= MAX_LOCALSPLICING_POTENTIAL) {
- /* Too many to check */
- /* segmentj_end = segmenti+1 + MAX_LOCALSPLICING_POTENTIAL; */
- segmentj = segmentj_end; /* Don't process any */
- } else {
- segmentj = segmenti+1;
- }
- for ( ; segmentj < segmentj_end; segmentj++) {
- debug4s(printf("minus local? diagonal %llu, querypos %d..%d => diagonal %llu, querypos %d..%d => ",
- (unsigned long long) segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
- (unsigned long long) segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
- /* j5 j3 i5 i3 */
- assert(segmenti->diagonal < segmentj->diagonal);
- if (segmentj->querypos3 >= segmenti->querypos5) {
- /* Fail querypos test */
- debug4s(printf("Bad querypos\n"));
-
- } else if (segmenti->diagonal + min_intronlength > segmentj->diagonal) {
- /* Too short to be an intron */
- debug4s(printf("Too short\n"));
+ spliceends_sense = spliceends_antisense = (List_T) NULL;
+ if (segmentj_end - segmenti >= MAX_LOCALSPLICING_POTENTIAL) {
+ /* Too many to check */
+ /* segmentj_end = segmenti+1 + MAX_LOCALSPLICING_POTENTIAL; */
+ segmentj = segmentj_end; /* Don't process any */
} else {
- segmenti->right_splice_p = true;
- segmentj->left_splice_p = true;
- if (floors_from_neg3[segmentj->querypos5] + floor_outer_i > max_mismatches_allowed) {
- /* Fail outer floor test */
- /* floors->score[-index1interval][segmentj->querypos5] + floors->score[segmenti->querypos3][query_lastpos+index1interval] */;
-
- debug4s(printf("too many mismatches, outer floor = %d+%d=%d > %d\n",
- floors->scorefrom[-index1interval][segmentj->querypos5],
- floors->scorefrom[segmenti->querypos3][query_lastpos+index1interval],
- floors->scorefrom[-index1interval][segmentj->querypos5] +
- floors->scorefrom[segmenti->querypos3][query_lastpos+index1interval],
- max_mismatches_allowed));
+ segmentj = segmenti+1;
+ }
+ for ( ; segmentj < segmentj_end; segmentj++) {
+ debug4s(printf("minus local? diagonal %llu, querypos %d..%d => diagonal %llu, querypos %d..%d => ",
+ (unsigned long long) segmenti->diagonal,segmenti->querypos5,segmenti->querypos3,
+ (unsigned long long) segmentj->diagonal,segmentj->querypos5,segmentj->querypos3));
+ /* j5 j3 i5 i3 */
+ assert(segmenti->diagonal < segmentj->diagonal);
+ if (segmentj->querypos3 >= segmenti->querypos5) {
+ /* Fail querypos test */
+ debug4s(printf("Bad querypos\n"));
+
+ } else if (segmenti->diagonal + min_intronlength > segmentj->diagonal) {
+ /* Too short to be an intron */
+ debug4s(printf("Too short\n"));
} else {
- /* Apply leftmost/rightmost test */
- if (segmenti->leftmost < 0) {
- nmismatches_left = Genome_mismatches_left(mismatch_positions_left,max_mismatches_allowed,
- query_compress,/*left*/segmenti_left,/*pos5*/0,/*pos3*/querylength,
- /*plusp*/false,genestrand,first_read_p);
- segmenti->leftmost = (nmismatches_left == 0) ? 0 : mismatch_positions_left[nmismatches_left-1];
- debug4s(printf("%d mismatches on left at:",nmismatches_left);
- for (i = 0; i <= nmismatches_left; i++) {
- printf(" %d",mismatch_positions_left[i]);
- }
- printf("\n"));
- }
+ segmenti->right_splice_p = true;
+ segmentj->left_splice_p = true;
+ if (floors_from_neg3[segmentj->querypos5] + floor_outer_i > max_mismatches_allowed) {
+ /* Fail outer floor test */
+ /* floors->score[-index1interval][segmentj->querypos5] + floors->score[segmenti->querypos3][query_lastpos+index1interval] */;
+
+ debug4s(printf("too many mismatches, outer floor = %d+%d=%d > %d\n",
+ floors->scorefrom[-index1interval][segmentj->querypos5],
+ floors->scorefrom[segmenti->querypos3][query_lastpos+index1interval],
+ floors->scorefrom[-index1interval][segmentj->querypos5] +
+ floors->scorefrom[segmenti->querypos3][query_lastpos+index1interval],
+ max_mismatches_allowed));
- segmentj_left = segmentj->diagonal - querylength;
- if (segmentj->rightmost < 0) {
- nmismatches_right = Genome_mismatches_right(mismatch_positions_right,max_mismatches_allowed,
- query_compress,/*left*/segmentj_left,/*pos5*/0,/*pos3*/querylength,
+ } else {
+ /* Apply leftmost/rightmost test */
+ if (segmenti->leftmost < 0) {
+ nmismatches_left = Genome_mismatches_left(mismatch_positions_left,max_mismatches_allowed,
+ query_compress,/*left*/segmenti_left,/*pos5*/0,/*pos3*/querylength,
/*plusp*/false,genestrand,first_read_p);
- segmentj->rightmost = (nmismatches_right == 0) ? 0 : mismatch_positions_right[nmismatches_right-1];
- debug4s(printf("%d mismatches on right at:",nmismatches_right);
- for (i = 0; i <= nmismatches_right; i++) {
- printf(" %d",mismatch_positions_right[i]);
- }
- printf("\n"));
- }
+ segmenti->leftmost = (nmismatches_left == 0) ? 0 : mismatch_positions_left[nmismatches_left-1];
+ debug4s(printf("%d mismatches on left at:",nmismatches_left);
+ for (i = 0; i <= nmismatches_left; i++) {
+ printf(" %d",mismatch_positions_left[i]);
+ }
+ printf("\n"));
+ }
+
+ segmentj_left = segmentj->diagonal - querylength;
+ if (segmentj->rightmost < 0) {
+ nmismatches_right = Genome_mismatches_right(mismatch_positions_right,max_mismatches_allowed,
+ query_compress,/*left*/segmentj_left,/*pos5*/0,/*pos3*/querylength,
+ /*plusp*/false,genestrand,first_read_p);
+ segmentj->rightmost = (nmismatches_right == 0) ? 0 : mismatch_positions_right[nmismatches_right-1];
+ debug4s(printf("%d mismatches on right at:",nmismatches_right);
+ for (i = 0; i <= nmismatches_right; i++) {
+ printf(" %d",mismatch_positions_right[i]);
+ }
+ printf("\n"));
+ }
+
+ debug4s(printf("For a single splice, want leftmost %d > rightmost %d\n",segmenti->leftmost,segmentj->rightmost));
+
+ if (segmenti->leftmost > segmentj->rightmost) {
+ /* Single splice is possible */
- debug4s(printf("For a single splice, want leftmost %d > rightmost %d\n",segmenti->leftmost,segmentj->rightmost));
-
- if (segmenti->leftmost > segmentj->rightmost) {
- /* Single splice is possible */
-
- segmentj_antidonor_nknown = 0;
- segmentj_acceptor_nknown = 0;
- if ((j = segmentj->splicesites_i) >= 0) {
- /* Ends 3 and 6: mark known splice sites in segmentj */
- while (j < nsplicesites && splicesites[j] < segmentj->diagonal) {
- if (splicetypes[j] == ANTIDONOR) {
- debug4s(printf("Setting known antidonor %d for segmentj at %llu\n",j,(unsigned long long) splicesites[j]));
- segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = splicesites[j] - segmentj_left;
- segmentj_antidonor_knowni[segmentj_antidonor_nknown++] = j;
- } else if (splicetypes[j] == ACCEPTOR) {
- debug4s(printf("Setting known acceptor %d for segmentj at %llu\n",j,(unsigned long long) splicesites[j]));
- segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = splicesites[j] - segmentj_left;
- segmentj_acceptor_knowni[segmentj_acceptor_nknown++] = j;
+ segmentj_antidonor_nknown = 0;
+ segmentj_acceptor_nknown = 0;
+ if ((j = segmentj->splicesites_i) >= 0) {
+ /* Ends 3 and 6: mark known splice sites in segmentj */
+ while (j < nsplicesites && splicesites[j] < segmentj->diagonal) {
+ if (splicetypes[j] == ANTIDONOR) {
+ debug4s(printf("Setting known antidonor %d for segmentj at %llu\n",j,(unsigned long long) splicesites[j]));
+ segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = splicesites[j] - segmentj_left;
+ segmentj_antidonor_knowni[segmentj_antidonor_nknown++] = j;
+ } else if (splicetypes[j] == ACCEPTOR) {
+ debug4s(printf("Setting known acceptor %d for segmentj at %llu\n",j,(unsigned long long) splicesites[j]));
+ segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = splicesites[j] - segmentj_left;
+ segmentj_acceptor_knowni[segmentj_acceptor_nknown++] = j;
+ }
+ j++;
}
- j++;
}
+ segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength;
+ segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = querylength;
+
+ debug4s(printf(" => checking for single splice: Splice_solve_single_minus\n"));
+ spliceends_sense =
+ Splice_solve_single_sense(&(*found_score),&nhits_local,spliceends_sense,&(*lowprob),
+ &segmenti->usedp,&segmentj->usedp,
+ /*segmenti_left*/segmenti->diagonal - querylength,
+ /*segmentj_left*/segmentj->diagonal - querylength,
+ segmenti->chrnum,segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
+ segmentj->chrnum,segmentj->chroffset,segmentj->chrhigh,segmentj->chrlength,
+ querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,max_mismatches_allowed,
+ /*plusp*/false,genestrand,first_read_p,subs_or_indels_p,
+ /*sarrayp*/false);
+ spliceends_antisense =
+ Splice_solve_single_antisense(&(*found_score),&nhits_local,spliceends_antisense,&(*lowprob),
+ &segmenti->usedp,&segmentj->usedp,
+ /*segmenti_left*/segmenti->diagonal - querylength,
+ /*segmentj_left*/segmentj->diagonal - querylength,
+ segmenti->chrnum,segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
+ segmentj->chrnum,segmentj->chroffset,segmentj->chrhigh,segmentj->chrlength,
+ querylength,query_compress,
+ segmenti_donor_knownpos,segmentj_acceptor_knownpos,
+ segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
+ segmenti_donor_knowni,segmentj_acceptor_knowni,
+ segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
+ segmenti_donor_nknown,segmentj_acceptor_nknown,
+ segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
+ splicing_penalty,max_mismatches_allowed,
+ /*plusp*/false,genestrand,first_read_p,subs_or_indels_p,
+ /*sarrayp*/false);
}
- segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength;
- segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = querylength;
-
- debug4s(printf(" => checking for single splice: Splice_solve_single_minus\n"));
- spliceends_sense =
- Splice_solve_single_sense(&(*found_score),&nhits_local,spliceends_sense,&(*lowprob),
- &segmenti->usedp,&segmentj->usedp,
- /*segmenti_left*/segmenti->diagonal - querylength,
- /*segmentj_left*/segmentj->diagonal - querylength,
- segmenti->chrnum,segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
- segmentj->chrnum,segmentj->chroffset,segmentj->chrhigh,segmentj->chrlength,
- querylength,query_compress,
- segmenti_donor_knownpos,segmentj_acceptor_knownpos,
- segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
- segmenti_donor_knowni,segmentj_acceptor_knowni,
- segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
- segmenti_donor_nknown,segmentj_acceptor_nknown,
- segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
- splicing_penalty,max_mismatches_allowed,
- /*plusp*/false,genestrand,first_read_p,subs_or_indels_p,
- /*sarrayp*/false);
- spliceends_antisense =
- Splice_solve_single_antisense(&(*found_score),&nhits_local,spliceends_antisense,&(*lowprob),
- &segmenti->usedp,&segmentj->usedp,
- /*segmenti_left*/segmenti->diagonal - querylength,
- /*segmentj_left*/segmentj->diagonal - querylength,
- segmenti->chrnum,segmenti->chroffset,segmenti->chrhigh,segmenti->chrlength,
- segmentj->chrnum,segmentj->chroffset,segmentj->chrhigh,segmentj->chrlength,
- querylength,query_compress,
- segmenti_donor_knownpos,segmentj_acceptor_knownpos,
- segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
- segmenti_donor_knowni,segmentj_acceptor_knowni,
- segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
- segmenti_donor_nknown,segmentj_acceptor_nknown,
- segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
- splicing_penalty,max_mismatches_allowed,
- /*plusp*/false,genestrand,first_read_p,subs_or_indels_p,
- /*sarrayp*/false);
}
}
}
- }
-
- /* Process results for segmenti, sense. Modified from collect_elt_matches in sarray-read.c. */
- if (spliceends_sense != NULL) {
- /* nmismatches here may be different for spliceends from Splice_solve, so pick based on prob and nmismatches */
- best_nmismatches = querylength;
- best_prob = 0.0;
- for (p = spliceends_sense; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- debug7(printf("analyzing distance %d, donor length %d (%llu..%llu) and acceptor length %d (%llu..%llu), nmismatches %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
- Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
- Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
- Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
- Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
- if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
- best_nmismatches = nmismatches;
- }
- if ((prob = Stage3end_chimera_prob(hit)) > best_prob) {
- best_prob = prob;
- }
- }
- n_good_spliceends = 0;
- accepted_hits = rejected_hits = (List_T) NULL;
- for (p = spliceends_sense; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
- Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
- debug7(printf("accepting distance %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ /* Process results for segmenti, sense. Modified from collect_elt_matches in sarray-read.c. */
+ if (spliceends_sense != NULL) {
+ /* nmismatches here may be different for spliceends from Splice_solve, so pick based on prob and nmismatches */
+ best_nmismatches = querylength;
+ best_prob = 0.0;
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ debug7(printf("analyzing distance %d, donor length %d (%llu..%llu) and acceptor length %d (%llu..%llu), nmismatches %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
+ Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
+ Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
+ Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
+ Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
- n_good_spliceends += 1;
- accepted_hits = List_push(accepted_hits,(void *) hit);
- } else {
- rejected_hits = List_push(rejected_hits,(void *) hit);
+ if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
+ best_nmismatches = nmismatches;
+ }
+ if ((prob = Stage3end_chimera_prob(hit)) > best_prob) {
+ best_prob = prob;
+ }
}
- }
- if (n_good_spliceends == 0) {
- /* Conjunction is too strict. Allow for disjunction instead. */
- List_free(&rejected_hits);
+ n_good_spliceends = 0;
+ accepted_hits = rejected_hits = (List_T) NULL;
for (p = spliceends_sense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
- if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP ||
+ if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
debug7(printf("accepting distance %d, probabilities %f and %f\n",
Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
@@ -7664,231 +7450,231 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
rejected_hits = List_push(rejected_hits,(void *) hit);
}
}
- }
-
- for (p = rejected_hits; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- Stage3end_free(&hit);
- }
- List_free(&rejected_hits);
- List_free(&spliceends_sense);
- if (n_good_spliceends == 1) {
- hits = List_push(hits,List_head(accepted_hits));
- List_free(&accepted_hits);
-
- } else {
- /* 1. Multiple hits, sense, left1 (segmenti_left) */
- debug7(printf("multiple splice hits, sense, minus\n"));
- donor_hits = acceptor_hits = (List_T) NULL;
+ if (n_good_spliceends == 0) {
+ /* Conjunction is too strict. Allow for disjunction instead. */
+ List_free(&rejected_hits);
+ for (p = spliceends_sense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP ||
+ Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
+ debug7(printf("accepting distance %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ n_good_spliceends += 1;
+ accepted_hits = List_push(accepted_hits,(void *) hit);
+ } else {
+ rejected_hits = List_push(rejected_hits,(void *) hit);
+ }
+ }
+ }
- /* minus branch from collect_elt_matches */
- for (p = accepted_hits; p != NULL; p = List_next(p)) {
+ for (p = rejected_hits; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
- donor = Stage3end_substring_donor(hit);
- acceptor = Stage3end_substring_acceptor(hit);
- if (Substring_genomicend(donor) == segmenti_left) {
- donor_hits = List_push(donor_hits,(void *) hit);
- } else if (Substring_genomicend(acceptor) == segmenti_left) {
- acceptor_hits = List_push(acceptor_hits,(void *) hit);
- } else {
- abort();
- Stage3end_free(&hit);
- }
+ Stage3end_free(&hit);
}
+ List_free(&rejected_hits);
+ List_free(&spliceends_sense);
- if (donor_hits != NULL) {
- hitarray = (Stage3end_T *) List_to_array_n(&n,donor_hits);
- qsort(hitarray,n,sizeof(Stage3end_T),donor_match_length_cmp);
- i = 0;
- while (i < n) {
- hit = hitarray[i];
+ if (n_good_spliceends == 1) {
+ hits = List_push(hits,List_head(accepted_hits));
+ List_free(&accepted_hits);
+
+ } else {
+ /* 1. Multiple hits, sense, left1 (segmenti_left) */
+ debug7(printf("multiple splice hits, sense, minus\n"));
+ donor_hits = acceptor_hits = (List_T) NULL;
+
+ /* minus branch from collect_elt_matches */
+ for (p = accepted_hits; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
donor = Stage3end_substring_donor(hit);
- donor_length = Substring_match_length_orig(donor);
- j = i + 1;
- while (j < n && Substring_match_length_orig(Stage3end_substring_donor(hitarray[j])) == donor_length) {
- j++;
- }
- if (j == i + 1) {
- hits = List_push(hits,(void *) hit);
+ acceptor = Stage3end_substring_acceptor(hit);
+ if (Substring_genomicend(donor) == segmenti_left) {
+ donor_hits = List_push(donor_hits,(void *) hit);
+ } else if (Substring_genomicend(acceptor) == segmenti_left) {
+ acceptor_hits = List_push(acceptor_hits,(void *) hit);
} else {
+ abort();
+ Stage3end_free(&hit);
+ }
+ }
+
+ if (donor_hits != NULL) {
+ hitarray = (Stage3end_T *) List_to_array_n(&n,donor_hits);
+ qsort(hitarray,n,sizeof(Stage3end_T),donor_match_length_cmp);
+ i = 0;
+ while (i < n) {
+ hit = hitarray[i];
+ donor = Stage3end_substring_donor(hit);
+ donor_length = Substring_match_length_orig(donor);
+ j = i + 1;
+ while (j < n && Substring_match_length_orig(Stage3end_substring_donor(hitarray[j])) == donor_length) {
+ j++;
+ }
+ if (j == i + 1) {
+ hits = List_push(hits,(void *) hit);
+ } else {
#ifdef LARGE_GENOMES
- ambcoords = (Uint8list_T) NULL;
+ ambcoords = (Uint8list_T) NULL;
#else
- ambcoords = (Uintlist_T) NULL;
+ ambcoords = (Uintlist_T) NULL;
#endif
- amb_knowni = (Intlist_T) NULL;
- amb_nmismatches = (Intlist_T) NULL;
- amb_probs = (Doublelist_T) NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+ amb_probs = (Doublelist_T) NULL;
- for (k = i; k < j; k++) {
- acceptor = Stage3end_substring_acceptor(hitarray[k]);
+ for (k = i; k < j; k++) {
+ acceptor = Stage3end_substring_acceptor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
#endif
- amb_knowni = Intlist_push(amb_knowni,-1);
- amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor));
- }
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
+ amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor));
+ }
- nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
- prob = best_prob - Substring_chimera_prob(donor);
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
- donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_length*/Substring_match_length_orig(acceptor),/*amb_prob*/prob,
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,amb_knowni,
- /*amb_nmismatches_donort*/NULL,amb_nmismatches,
- /*amb_probs_donor*/NULL,amb_probs,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- Stage3end_sensedir(hit),/*sarrayp*/false));
- Doublelist_free(&amb_probs);
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
+ nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
+ donor_prob = Junction_donor_prob(Stage3end_junctionA(hit));
+ prob = best_prob - donor_prob;
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
+ donor,/*acceptor*/NULL,donor_prob,/*acceptor_prob*/prob,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(acceptor),/*amb_prob*/prob,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donort*/NULL,amb_nmismatches,
+ /*amb_probs_donor*/NULL,amb_probs,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/false));
+ Doublelist_free(&amb_probs);
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords);
+ Uint8list_free(&ambcoords);
#else
- Uintlist_free(&ambcoords);
+ Uintlist_free(&ambcoords);
#endif
- for (k = i; k < j; k++) {
- hit = hitarray[k];
- Stage3end_free(&hit);
+ for (k = i; k < j; k++) {
+ hit = hitarray[k];
+ Stage3end_free(&hit);
+ }
}
- }
- i = j;
+ i = j;
+ }
+ FREE(hitarray);
+ List_free(&donor_hits);
}
- FREE(hitarray);
- List_free(&donor_hits);
- }
- if (acceptor_hits != NULL) {
- hitarray = (Stage3end_T *) List_to_array_n(&n,acceptor_hits);
- qsort(hitarray,n,sizeof(Stage3end_T),acceptor_match_length_cmp);
- i = 0;
- while (i < n) {
- hit = hitarray[i];
- acceptor = Stage3end_substring_acceptor(hit);
- acceptor_length = Substring_match_length_orig(acceptor);
- j = i + 1;
- while (j < n && Substring_match_length_orig(Stage3end_substring_acceptor(hitarray[j])) == acceptor_length) {
- j++;
- }
- if (j == i + 1) {
- hits = List_push(hits,(void *) hit);
- } else {
+ if (acceptor_hits != NULL) {
+ hitarray = (Stage3end_T *) List_to_array_n(&n,acceptor_hits);
+ qsort(hitarray,n,sizeof(Stage3end_T),acceptor_match_length_cmp);
+ i = 0;
+ while (i < n) {
+ hit = hitarray[i];
+ acceptor = Stage3end_substring_acceptor(hit);
+ acceptor_length = Substring_match_length_orig(acceptor);
+ j = i + 1;
+ while (j < n && Substring_match_length_orig(Stage3end_substring_acceptor(hitarray[j])) == acceptor_length) {
+ j++;
+ }
+ if (j == i + 1) {
+ hits = List_push(hits,(void *) hit);
+ } else {
#ifdef LARGE_GENOMES
- ambcoords = (Uint8list_T) NULL;
+ ambcoords = (Uint8list_T) NULL;
#else
- ambcoords = (Uintlist_T) NULL;
+ ambcoords = (Uintlist_T) NULL;
#endif
- amb_knowni = (Intlist_T) NULL;
- amb_nmismatches = (Intlist_T) NULL;
- amb_probs = (Doublelist_T) NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+ amb_probs = (Doublelist_T) NULL;
- for (k = i; k < j; k++) {
- donor = Stage3end_substring_donor(hitarray[k]);
+ for (k = i; k < j; k++) {
+ donor = Stage3end_substring_donor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
#endif
- amb_knowni = Intlist_push(amb_knowni,-1);
- amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor));
- }
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
+ amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor));
+ }
- nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
- prob = best_prob - Substring_chimera_prob(acceptor);
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
- /*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_length*/Substring_match_length_orig(donor),/*amb_prob*/prob,
- ambcoords,/*ambcoords_acceptor*/NULL,
- amb_knowni,/*amb_knowni_acceptor*/NULL,
- amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
- amb_probs,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- Stage3end_sensedir(hit),/*sarrayp*/false));
- Doublelist_free(&amb_probs);
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
+ nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
+ acceptor_prob = Junction_acceptor_prob(Stage3end_junctionD(hit));
+ prob = best_prob - acceptor_prob;
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
+ /*donor*/NULL,acceptor,/*donor_prob*/prob,acceptor_prob,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(donor),/*amb_prob*/prob,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
+ amb_probs,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/false));
+ Doublelist_free(&amb_probs);
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords);
+ Uint8list_free(&ambcoords);
#else
- Uintlist_free(&ambcoords);
+ Uintlist_free(&ambcoords);
#endif
- for (k = i; k < j; k++) {
- hit = hitarray[k];
- Stage3end_free(&hit);
+ for (k = i; k < j; k++) {
+ hit = hitarray[k];
+ Stage3end_free(&hit);
+ }
}
- }
- i = j;
+ i = j;
+ }
+ FREE(hitarray);
+ List_free(&acceptor_hits);
}
- FREE(hitarray);
- List_free(&acceptor_hits);
- }
-
- List_free(&accepted_hits);
- }
- }
- /* Process results for segmenti, antisense. Modified from collect_elt_matches in sarray-read.c. */
- if (spliceends_antisense != NULL) {
- /* nmismatches here may be different for spliceends from Splice_solve, so pick based on prob and nmismatches */
- best_nmismatches = querylength;
- best_prob = 0.0;
- for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- debug7(printf("analyzing distance %d, donor length %d (%llu..%llu) and acceptor length %d (%llu..%llu), nmismatches %d, probabilities %f and %f\n",
- Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
- Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
- Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
- Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
- Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
- Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
- if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
- best_nmismatches = nmismatches;
- }
- if ((prob = Stage3end_chimera_prob(hit)) > best_prob) {
- best_prob = prob;
+ List_free(&accepted_hits);
}
}
- n_good_spliceends = 0;
- accepted_hits = rejected_hits = (List_T) NULL;
- for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
- Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
- debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
+ /* Process results for segmenti, antisense. Modified from collect_elt_matches in sarray-read.c. */
+ if (spliceends_antisense != NULL) {
+ /* nmismatches here may be different for spliceends from Splice_solve, so pick based on prob and nmismatches */
+ best_nmismatches = querylength;
+ best_prob = 0.0;
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ debug7(printf("analyzing distance %d, donor length %d (%llu..%llu) and acceptor length %d (%llu..%llu), nmismatches %d, probabilities %f and %f\n",
Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
+ Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
- Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
+ Stage3end_nmismatches_whole(hit),Substring_chimera_prob(Stage3end_substring_donor(hit)),
Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
- n_good_spliceends += 1;
- accepted_hits = List_push(accepted_hits,(void *) hit);
- } else {
- rejected_hits = List_push(rejected_hits,(void *) hit);
+ if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
+ best_nmismatches = nmismatches;
+ }
+ if ((prob = Stage3end_chimera_prob(hit)) > best_prob) {
+ best_prob = prob;
+ }
}
- }
- if (n_good_spliceends == 0) {
- /* Conjunction is too strict. Allow for disjunction instead. */
- List_free(&rejected_hits);
+ n_good_spliceends = 0;
+ accepted_hits = rejected_hits = (List_T) NULL;
for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
- if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP ||
+ if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
@@ -7901,184 +7687,206 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
rejected_hits = List_push(rejected_hits,(void *) hit);
}
}
- }
-
- for (p = rejected_hits; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- Stage3end_free(&hit);
- }
- List_free(&rejected_hits);
- List_free(&spliceends_antisense);
- if (n_good_spliceends == 1) {
- hits = List_push(hits,List_head(accepted_hits));
- List_free(&accepted_hits);
-
- } else {
- /* 2. Multiple hits, antisense, left1 (segmenti_left) */
- debug7(printf("multiple splice hits, antisense, minus\n"));
- donor_hits = acceptor_hits = (List_T) NULL;
+ if (n_good_spliceends == 0) {
+ /* Conjunction is too strict. Allow for disjunction instead. */
+ List_free(&rejected_hits);
+ for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP ||
+ Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
+ debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
+ Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
+ Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
+ Substring_chimera_prob(Stage3end_substring_donor(hit)),
+ Substring_chimera_prob(Stage3end_substring_acceptor(hit))));
+ n_good_spliceends += 1;
+ accepted_hits = List_push(accepted_hits,(void *) hit);
+ } else {
+ rejected_hits = List_push(rejected_hits,(void *) hit);
+ }
+ }
+ }
- /* minus branch from collect_elt_matches */
- for (p = accepted_hits; p != NULL; p = List_next(p)) {
+ for (p = rejected_hits; p != NULL; p = List_next(p)) {
hit = (Stage3end_T) List_head(p);
- donor = Stage3end_substring_donor(hit);
- acceptor = Stage3end_substring_acceptor(hit);
- if (Substring_genomicend(donor) == segmenti_left) {
- donor_hits = List_push(donor_hits,(void *) hit);
- } else if (Substring_genomicend(acceptor) == segmenti_left) {
- acceptor_hits = List_push(acceptor_hits,(void *) hit);
- } else {
- abort();
- Stage3end_free(&hit);
- }
+ Stage3end_free(&hit);
}
+ List_free(&rejected_hits);
+ List_free(&spliceends_antisense);
- if (donor_hits != NULL) {
- hitarray = (Stage3end_T *) List_to_array_n(&n,donor_hits);
- qsort(hitarray,n,sizeof(Stage3end_T),donor_match_length_cmp);
- i = 0;
- while (i < n) {
- hit = hitarray[i];
+ if (n_good_spliceends == 1) {
+ hits = List_push(hits,List_head(accepted_hits));
+ List_free(&accepted_hits);
+
+ } else {
+ /* 2. Multiple hits, antisense, left1 (segmenti_left) */
+ debug7(printf("multiple splice hits, antisense, minus\n"));
+ donor_hits = acceptor_hits = (List_T) NULL;
+
+ /* minus branch from collect_elt_matches */
+ for (p = accepted_hits; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
donor = Stage3end_substring_donor(hit);
- donor_length = Substring_match_length_orig(donor);
- j = i + 1;
- while (j < n && Substring_match_length_orig(Stage3end_substring_donor(hitarray[j])) == donor_length) {
- j++;
- }
- if (j == i + 1) {
- hits = List_push(hits,(void *) hit);
+ acceptor = Stage3end_substring_acceptor(hit);
+ if (Substring_genomicend(donor) == segmenti_left) {
+ donor_hits = List_push(donor_hits,(void *) hit);
+ } else if (Substring_genomicend(acceptor) == segmenti_left) {
+ acceptor_hits = List_push(acceptor_hits,(void *) hit);
} else {
+ abort();
+ Stage3end_free(&hit);
+ }
+ }
+
+ if (donor_hits != NULL) {
+ hitarray = (Stage3end_T *) List_to_array_n(&n,donor_hits);
+ qsort(hitarray,n,sizeof(Stage3end_T),donor_match_length_cmp);
+ i = 0;
+ while (i < n) {
+ hit = hitarray[i];
+ donor = Stage3end_substring_donor(hit);
+ donor_length = Substring_match_length_orig(donor);
+ j = i + 1;
+ while (j < n && Substring_match_length_orig(Stage3end_substring_donor(hitarray[j])) == donor_length) {
+ j++;
+ }
+ if (j == i + 1) {
+ hits = List_push(hits,(void *) hit);
+ } else {
#ifdef LARGE_GENOMES
- ambcoords = (Uint8list_T) NULL;
+ ambcoords = (Uint8list_T) NULL;
#else
- ambcoords = (Uintlist_T) NULL;
+ ambcoords = (Uintlist_T) NULL;
#endif
- amb_knowni = (Intlist_T) NULL;
- amb_nmismatches = (Intlist_T) NULL;
- amb_probs = (Doublelist_T) NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+ amb_probs = (Doublelist_T) NULL;
- for (k = i; k < j; k++) {
- acceptor = Stage3end_substring_acceptor(hitarray[k]);
+ for (k = i; k < j; k++) {
+ acceptor = Stage3end_substring_acceptor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(acceptor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(acceptor));
#endif
- amb_knowni = Intlist_push(amb_knowni,-1);
- amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor));
- }
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
+ amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(acceptor));
+ }
- nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
- prob = best_prob - Substring_chimera_prob(donor);
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
- donor,/*acceptor*/NULL,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_length*/Substring_match_length_orig(acceptor),/*amb_prob*/prob,
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,amb_knowni,
- /*amb_nmismatches_donort*/NULL,amb_nmismatches,
- /*amb_probs_donor*/NULL,amb_probs,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
- Stage3end_sensedir(hit),/*sarrayp*/false));
- Doublelist_free(&amb_probs);
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
+ nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
+ donor_prob = Junction_donor_prob(Stage3end_junctionA(hit));
+ prob = best_prob - donor_prob;
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
+ donor,/*acceptor*/NULL,donor_prob,/*acceptor_prob*/prob,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(acceptor),/*amb_prob*/prob,
+ /*ambcoords_donor*/NULL,ambcoords,
+ /*amb_knowni_donor*/NULL,amb_knowni,
+ /*amb_nmismatches_donort*/NULL,amb_nmismatches,
+ /*amb_probs_donor*/NULL,amb_probs,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/false));
+ Doublelist_free(&amb_probs);
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords);
+ Uint8list_free(&ambcoords);
#else
- Uintlist_free(&ambcoords);
+ Uintlist_free(&ambcoords);
#endif
- for (k = i; k < j; k++) {
- hit = hitarray[k];
- Stage3end_free(&hit);
+ for (k = i; k < j; k++) {
+ hit = hitarray[k];
+ Stage3end_free(&hit);
+ }
}
- }
-
- i = j;
- }
- FREE(hitarray);
- List_free(&donor_hits);
- }
- if (acceptor_hits != NULL) {
- hitarray = (Stage3end_T *) List_to_array_n(&n,acceptor_hits);
- qsort(hitarray,n,sizeof(Stage3end_T),acceptor_match_length_cmp);
- i = 0;
- while (i < n) {
- hit = hitarray[i];
- acceptor = Stage3end_substring_acceptor(hit);
- acceptor_length = Substring_match_length_orig(acceptor);
- j = i + 1;
- while (j < n && Substring_match_length_orig(Stage3end_substring_acceptor(hitarray[j])) == acceptor_length) {
- j++;
+ i = j;
}
- if (j == i + 1) {
- hits = List_push(hits,(void *) hit);
- } else {
+ FREE(hitarray);
+ List_free(&donor_hits);
+ }
+
+ if (acceptor_hits != NULL) {
+ hitarray = (Stage3end_T *) List_to_array_n(&n,acceptor_hits);
+ qsort(hitarray,n,sizeof(Stage3end_T),acceptor_match_length_cmp);
+ i = 0;
+ while (i < n) {
+ hit = hitarray[i];
+ acceptor = Stage3end_substring_acceptor(hit);
+ acceptor_length = Substring_match_length_orig(acceptor);
+ j = i + 1;
+ while (j < n && Substring_match_length_orig(Stage3end_substring_acceptor(hitarray[j])) == acceptor_length) {
+ j++;
+ }
+ if (j == i + 1) {
+ hits = List_push(hits,(void *) hit);
+ } else {
#ifdef LARGE_GENOMES
- ambcoords = (Uint8list_T) NULL;
+ ambcoords = (Uint8list_T) NULL;
#else
- ambcoords = (Uintlist_T) NULL;
+ ambcoords = (Uintlist_T) NULL;
#endif
- amb_knowni = (Intlist_T) NULL;
- amb_nmismatches = (Intlist_T) NULL;
- amb_probs = (Doublelist_T) NULL;
+ amb_knowni = (Intlist_T) NULL;
+ amb_nmismatches = (Intlist_T) NULL;
+ amb_probs = (Doublelist_T) NULL;
- for (k = i; k < j; k++) {
- donor = Stage3end_substring_donor(hitarray[k]);
+ for (k = i; k < j; k++) {
+ donor = Stage3end_substring_donor(hitarray[k]);
#ifdef LARGE_GENOMES
- ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uint8list_push(ambcoords,Substring_splicecoord(donor));
#else
- ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
+ ambcoords = Uintlist_push(ambcoords,Substring_splicecoord(donor));
#endif
- amb_knowni = Intlist_push(amb_knowni,-1);
- amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
- amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor));
- }
+ amb_knowni = Intlist_push(amb_knowni,-1);
+ amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
+ amb_probs = Doublelist_push(amb_probs,Substring_chimera_prob(donor));
+ }
- nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
- prob = best_prob - Substring_chimera_prob(acceptor);
- *ambiguous = List_push(*ambiguous,
- (void *) Stage3end_new_splice(&(*found_score),
- nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
- /*donor*/NULL,acceptor,/*distance*/0U,
- /*shortdistancep*/false,/*penalty*/0,querylength,
- /*amb_length*/Substring_match_length_orig(donor),/*amb_prob*/prob,
- ambcoords,/*ambcoords_acceptor*/NULL,
- amb_knowni,/*amb_knowni_acceptor*/NULL,
- amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
- amb_probs,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
- Stage3end_sensedir(hit),/*sarrayp*/false));
- Doublelist_free(&amb_probs);
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
+ nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
+ acceptor_prob = Junction_acceptor_prob(Stage3end_junctionD(hit));
+ prob = best_prob - acceptor_prob;
+ *ambiguous = List_push(*ambiguous,
+ (void *) Stage3end_new_splice(&(*found_score),
+ nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
+ /*donor*/NULL,acceptor,/*donor_prob*/prob,acceptor_prob,/*distance*/0U,
+ /*shortdistancep*/false,/*penalty*/0,querylength,
+ /*amb_length*/Substring_match_length_orig(donor),/*amb_prob*/prob,
+ ambcoords,/*ambcoords_acceptor*/NULL,
+ amb_knowni,/*amb_knowni_acceptor*/NULL,
+ amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
+ amb_probs,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
+ Stage3end_sensedir(hit),/*sarrayp*/false));
+ Doublelist_free(&amb_probs);
+ Intlist_free(&amb_nmismatches);
+ Intlist_free(&amb_knowni);
#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords);
+ Uint8list_free(&ambcoords);
#else
- Uintlist_free(&ambcoords);
+ Uintlist_free(&ambcoords);
#endif
- for (k = i; k < j; k++) {
- hit = hitarray[k];
- Stage3end_free(&hit);
+ for (k = i; k < j; k++) {
+ hit = hitarray[k];
+ Stage3end_free(&hit);
+ }
}
- }
- i = j;
+ i = j;
+ }
+ FREE(hitarray);
+ List_free(&acceptor_hits);
}
- FREE(hitarray);
- List_free(&acceptor_hits);
- }
- List_free(&accepted_hits);
+ List_free(&accepted_hits);
+ }
}
- }
+ }
}
}
@@ -8175,12 +7983,11 @@ substringA_match_length_cmp (const void *a, const void *b) {
static List_T
find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
Segment_T *spliceable, int nspliceable, struct Segment_T *segments,
- char *queryptr, Floors_T floors,
- int querylength, int query_lastpos, Compress_T query_compress,
+ char *queryptr, int querylength, int query_lastpos, Compress_T query_compress,
Chrpos_T max_distance, int splicing_penalty, int min_shortend,
int max_mismatches_allowed, bool pairedp, bool first_read_p,
bool plusp, int genestrand, bool subs_or_indels_p) {
- int j, j1, j2, joffset, k, l, jj;
+ int j, j1, j2, joffset, jj;
Segment_T segmenti, segmentj, segmentm, segmenti_start, segmentj_end, *ptr;
List_T potentiali, potentialj, q, r;
@@ -8226,9 +8033,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
Intlist_T splicesites_i_left, splicesites_i_right;
Intlist_T nmismatches_list_left, nmismatches_list_right;
bool ambp_left, ambp_right;
- bool sensep;
int sensedir;
- int *floors_from_neg3, *floors_to_pos3;
+ /* int *floors_from_neg3, *floors_to_pos3; */
int nmismatches_shortexon_left, nmismatches_shortexon_middle, nmismatches_shortexon_right;
int amb_length_donor, amb_length_acceptor;
@@ -8243,7 +8049,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
List_T spliceends, p;
Stage3end_T hit, *hitarray;
int best_nmismatches, nmismatches;
- int n_good_spliceends, n, i;
+ int n_good_spliceends, n, i, k;
double best_prob, prob;
Univcoord_T lastpos;
Intlist_T donor_amb_knowni, acceptor_amb_knowni, donor_amb_nmismatches, acceptor_amb_nmismatches;
@@ -8253,8 +8059,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
debug(printf("*** Starting find_known_doublesplices on %d segments ***\n",nspliceable));
debug(printf("Initially have %d hits\n",List_length(hits)));
- floors_from_neg3 = floors->scorefrom[-index1interval];
- floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
+ /* floors_from_neg3 = floors->scorefrom[-index1interval]; */
+ /* floors_to_pos3 = floors->scoreto[query_lastpos+index1interval]; */
for (ptr = spliceable; ptr < &(spliceable[nspliceable]); ptr++) {
segmentm = *ptr;
@@ -8633,10 +8439,12 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
}
}
- shortexon = Stage3end_substring1(hitarray[i]);
+ shortexon = Stage3end_substringS(hitarray[i]);
sensedir = Stage3end_sensedir(hitarray[i]);
if (Intlist_length(donor_amb_nmismatches) > 1 && Intlist_length(acceptor_amb_nmismatches) > 1) {
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,/*acceptor*/NULL,shortexon,
+ /*donor_prob*/Doublelist_max(donor_amb_probs),Substring_siteA_prob(shortexon),
+ Substring_siteD_prob(shortexon),/*acceptor_prob*/Doublelist_max(acceptor_amb_probs),
/*amb_length_donor*/donor_length,/*amb_length_acceptor*/acceptor_length,
/*amb_prob_donor*/Doublelist_max(donor_amb_probs),/*amb_prob_acceptor*/Doublelist_max(acceptor_amb_probs),
donor_ambcoords,acceptor_ambcoords,
@@ -8644,10 +8452,12 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
donor_amb_nmismatches,acceptor_amb_nmismatches,
donor_amb_probs,acceptor_amb_probs,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
+ splicing_penalty,querylength,first_read_p,sensedir,/*sarrayp*/false));
} else if (Intlist_length(donor_amb_nmismatches) > 1) {
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,acceptor,shortexon,
+ /*donor_prob*/Doublelist_max(donor_amb_probs),Substring_siteA_prob(shortexon),
+ Substring_siteD_prob(shortexon),/*acceptor_prob*/Substring_chimera_prob(acceptor),
/*amb_length_donor*/donor_length,/*amb_length_acceptor*/0,
/*amb_prob_donor*/Doublelist_max(donor_amb_probs),/*amb_length_acceptor*/0.0,
donor_ambcoords,/*acceptor_ambcoords*/NULL,
@@ -8655,10 +8465,12 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
donor_amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
donor_amb_probs,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
+ splicing_penalty,querylength,first_read_p,sensedir,/*sarrayp*/false));
} else if (Intlist_length(acceptor_amb_nmismatches) > 1) {
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,/*acceptor*/NULL,shortexon,
+ /*donor_prob*/Substring_chimera_prob(donor),Substring_siteA_prob(shortexon),
+ Substring_siteD_prob(shortexon),/*acceptor_prob*/Doublelist_max(acceptor_amb_probs),
/*amb_length_donor*/0,/*amb_length_acceptor*/acceptor_length,
/*amb_prob_donor*/0.0,/*amb_prob_acceptor*/Doublelist_max(acceptor_amb_probs),
/*ambcoords_donor*/NULL,acceptor_ambcoords,
@@ -8666,11 +8478,13 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*amb_nmismatches_donor*/NULL,acceptor_amb_nmismatches,
/*amb_probs_donor*/NULL,acceptor_amb_probs,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
+ splicing_penalty,querylength,first_read_p,sensedir,/*sarrayp*/false));
} else {
/* A singleton, apparently due to many duplicates. Is this possible? */
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
+ /*donor_prob*/Substring_chimera_prob(donor),Substring_siteA_prob(shortexon),
+ Substring_siteD_prob(shortexon),/*acceptor_prob*/Substring_chimera_prob(acceptor),
/*amb_length_donor*/0,/*amb_length_acceptor*/0,
/*amb_prob_donor*/0.0,/*amb_prob_acceptor*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
@@ -8678,7 +8492,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/true,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
+ splicing_penalty,querylength,first_read_p,sensedir,/*sarrayp*/false));
}
@@ -8741,9 +8555,11 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
acceptor_amb_probs = Doublelist_push(acceptor_amb_probs,Substring_chimera_prob(acceptor));
}
- shortexon = Stage3end_substring1(hitarray[i]);
+ shortexon = Stage3end_substringS(hitarray[i]);
sensedir = Stage3end_sensedir(hitarray[i]);
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,/*acceptor*/NULL,shortexon,
+ /*donor_prob*/Substring_chimera_prob(donor),Substring_siteA_prob(shortexon),
+ Substring_siteD_prob(shortexon),/*acceptor_prob*/Doublelist_max(acceptor_amb_probs),
/*amb_length_donor*/0,/*amb_length_acceptor*/Substring_match_length_orig(acceptor),
/*amb_prob_donor*/0.0,/*amb_prob_acceptor*/Doublelist_max(acceptor_amb_probs),
/*ambcoords_donor*/NULL,acceptor_ambcoords,
@@ -8751,7 +8567,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*amb_nmismatches_donor*/NULL,acceptor_amb_nmismatches,
/*amb_probs_donor*/NULL,acceptor_amb_probs,
/*copy_donor_p*/true,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
+ splicing_penalty,querylength,first_read_p,sensedir,/*sarrayp*/false));
Doublelist_free(&acceptor_amb_probs);
Intlist_free(&acceptor_amb_nmismatches);
Intlist_free(&acceptor_amb_knowni);
@@ -8805,9 +8621,11 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
donor_amb_probs = Doublelist_push(donor_amb_probs,Substring_chimera_prob(donor));
}
- shortexon = Stage3end_substring1(hitarray[i]);
+ shortexon = Stage3end_substringS(hitarray[i]);
sensedir = Stage3end_sensedir(hitarray[i]);
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,acceptor,shortexon,
+ /*donor_prob*/Doublelist_max(donor_amb_probs),Substring_siteA_prob(shortexon),
+ Substring_siteD_prob(shortexon),/*acceptor_prob*/Substring_chimera_prob(acceptor),
/*amb_length_donor*/Substring_match_length_orig(donor),/*amb_length_acceptor*/0,
/*amb_prob_donor*/Doublelist_max(donor_amb_probs),/*amb_prob_acceptor*/0.0,
donor_ambcoords,/*acceptor_ambcoords*/NULL,
@@ -8815,7 +8633,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
donor_amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
donor_amb_probs,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
+ splicing_penalty,querylength,first_read_p,sensedir,/*sarrayp*/false));
Doublelist_free(&donor_amb_probs);
Intlist_free(&donor_amb_nmismatches);
Intlist_free(&donor_amb_knowni);
@@ -8841,7 +8659,6 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/* Short exon using known splicing, originally on plus strand */
if (shortexon_orig_plusp == true) {
debug4k(printf("Short exon candidate, orig_plusp. Saw short exon acceptor...donor on segment i\n"));
- sensep = (plusp == true) ? true : false;
sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
for (j1 = joffset; j1 < j; j1++) {
@@ -8884,7 +8701,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*acceptor_prob*/2.0,/*donor_prob*/2.0,
/*left*/segmentm_left,query_compress,
querylength,plusp,genestrand,first_read_p,
- sensep,/*acceptor_ambp*/true,/*donor_ambp*/true,
+ sensedir,/*acceptor_ambp*/true,/*donor_ambp*/true,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (shortexon != NULL) {
debug4k(printf("New one-third shortexon at left %llu\n",(unsigned long long) segmentm_left));
@@ -8894,6 +8711,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
amb_length_acceptor = querylength - rightpos /*- nmismatches_shortexon_right*/;
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,/*acceptor*/NULL,shortexon,
+ Doublelist_max(probs_donor),Substring_siteA_prob(shortexon),
+ Substring_siteD_prob(shortexon),Doublelist_max(probs_acceptor),
amb_length_donor,amb_length_acceptor,
/*amb_prob_donor*/2.0,/*amb_prob_acceptor*/2.0,
ambcoords_donor,ambcoords_acceptor,
@@ -8901,7 +8720,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*amb_nmismatches_donor*/nmismatches_list_left,/*amb_nmismatches_acceptor*/nmismatches_list_right,
/*amb_probs_donor*/probs_donor,/*amb_nmismatches_acceptor*/probs_acceptor,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
+ splicing_penalty,querylength,first_read_p,sensedir,/*sarrayp*/false));
Doublelist_free(&probs_donor);
Doublelist_free(&probs_acceptor);
#ifdef LARGE_GENOMES
@@ -8926,14 +8745,14 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*acceptor_prob*/2.0,/*donor_prob*/2.0,
/*left*/segmentm_left,query_compress,
querylength,plusp,genestrand,first_read_p,
- sensep,/*acceptor_ambp*/true,/*donor_ambp*/false,
+ sensedir,/*acceptor_ambp*/true,/*donor_ambp*/false,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
debug4k(printf("acceptor at %d (%llu)\n",best_right_j,(unsigned long long) splicesites[best_right_j]));
acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_right_j],/*acceptor_knowni*/best_right_j,
/*splice_pos*/rightpos,nmismatches_shortexon_right,
/*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos,
- query_compress,querylength,plusp,genestrand,first_read_p,sensep,segmentm->chrnum,
+ query_compress,querylength,plusp,genestrand,first_read_p,sensedir,segmentm->chrnum,
segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (shortexon == NULL || acceptor == NULL) {
@@ -8946,6 +8765,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
amb_length_donor = leftpos /*- nmismatches_shortexon_left*/;
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,acceptor,shortexon,
+ Doublelist_max(probs_donor),Substring_siteA_prob(shortexon),
+ Substring_siteD_prob(shortexon),Substring_chimera_prob(acceptor),
amb_length_donor,/*amb_length_acceptor*/0,
/*amb_prob_donor*/2.0,/*amb_length_acceptor*/0,
ambcoords_donor,/*ambcoords_acceptor*/NULL,
@@ -8953,7 +8774,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*amb_nmismatches_donor*/nmismatches_list_left,/*amb_nmismatches_acceptor*/NULL,
/*amb_probs_donor*/probs_donor,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
+ splicing_penalty,querylength,first_read_p,sensedir,/*sarrayp*/false));
Doublelist_free(&probs_donor);
#ifdef LARGE_GENOMES
Uint8list_free(&ambcoords_donor);
@@ -8970,7 +8791,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
donor = Substring_new_donor(/*donor_coord*/splicesites[best_left_j],/*donor_knowni*/best_left_j,
/*splice_pos*/leftpos,nmismatches_shortexon_left,
/*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos,
- query_compress,querylength,plusp,genestrand,first_read_p,sensep,segmentm->chrnum,
+ query_compress,querylength,plusp,genestrand,first_read_p,sensedir,segmentm->chrnum,
segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
debug4k(printf("shortexon with acceptor at %d (%llu) ... amb_donor %d (%llu)\n",
@@ -8982,7 +8803,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*acceptor_prob*/2.0,/*donor_prob*/2.0,
/*left*/segmentm_left,query_compress,
querylength,plusp,genestrand,first_read_p,
- sensep,/*acceptor_ambp*/false,/*donor_ambp*/true,
+ sensedir,/*acceptor_ambp*/false,/*donor_ambp*/true,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (donor == NULL || shortexon == NULL) {
@@ -8993,6 +8814,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
amb_length_acceptor = querylength - rightpos /*- nmismatches_shortexon_right*/;
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,/*acceptor*/NULL,shortexon,
+ Substring_chimera_prob(donor),Substring_siteA_prob(shortexon),
+ Substring_siteD_prob(shortexon),Doublelist_max(probs_acceptor),
/*amb_length_donor*/0,amb_length_acceptor,
/*amb_prob_donor*/0.0,/*amb_length_acceptor*/2.0,
/*ambcoords_donor*/NULL,ambcoords_acceptor,
@@ -9000,7 +8823,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list_right,
/*amb_probs_donor*/NULL,/*amb_probs_acceptor*/probs_acceptor,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
+ splicing_penalty,querylength,first_read_p,sensedir,/*sarrayp*/false));
Doublelist_free(&probs_acceptor);
#ifdef LARGE_GENOMES
Uint8list_free(&ambcoords_acceptor);
@@ -9017,7 +8840,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
donor = Substring_new_donor(/*donor_coord*/splicesites[best_left_j],/*donor_knowni*/best_left_j,
/*splice_pos*/leftpos,nmismatches_shortexon_left,
/*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos,
- query_compress,querylength,plusp,genestrand,first_read_p,sensep,segmentm->chrnum,
+ query_compress,querylength,plusp,genestrand,first_read_p,sensedir,segmentm->chrnum,
segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j1],/*acceptor_knowni*/j1,
@@ -9026,13 +8849,13 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
nmismatches_shortexon_middle,/*acceptor_prob*/2.0,/*donor_prob*/2.0,
/*left*/segmentm_left,query_compress,
querylength,plusp,genestrand,first_read_p,
- sensep,/*acceptor_ambp*/false,/*donor_ambp*/false,
+ sensedir,/*acceptor_ambp*/false,/*donor_ambp*/false,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_right_j],/*acceptor_knowni*/best_right_j,
/*splice_pos*/rightpos,nmismatches_shortexon_right,
/*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos,
- query_compress,querylength,plusp,genestrand,first_read_p,sensep,segmentm->chrnum,
+ query_compress,querylength,plusp,genestrand,first_read_p,sensedir,segmentm->chrnum,
segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (donor == NULL || shortexon == NULL || acceptor == NULL) {
@@ -9043,6 +8866,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
debug4k(printf("New shortexon at left %llu\n",(unsigned long long) segmentm_left));
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
+ Substring_chimera_prob(donor),Substring_siteA_prob(shortexon),
+ Substring_siteD_prob(shortexon),Substring_chimera_prob(acceptor),
/*amb_length_donor*/0,/*amb_length_acceptor*/0,
/*amb_prob_donor*/0.0,/*amb_prob_acceptor*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
@@ -9050,7 +8875,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
+ splicing_penalty,querylength,first_read_p,sensedir,/*sarrayp*/false));
}
}
Intlist_free(&nmismatches_list_right);
@@ -9069,7 +8894,6 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/* Short exon using known splicing, originally on minus strand */
if (shortexon_orig_minusp == true) {
debug4k(printf("Short exon candidate, orig_minusp. Saw short exon antidonor...antiacceptor on segment i\n"));
- sensep = (plusp == true) ? false : true;
sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
for (j1 = joffset; j1 < j; j1++) {
@@ -9112,7 +8936,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*acceptor_prob*/2.0,/*donor_prob*/2.0,
/*left*/segmentm_left,query_compress,
querylength,plusp,genestrand,first_read_p,
- sensep,/*acceptor_ambp*/true,/*donor_ambp*/true,
+ sensedir,/*acceptor_ambp*/true,/*donor_ambp*/true,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (shortexon != NULL) {
debug4k(printf("New one-third shortexon at left %llu\n",(unsigned long long) segmentm_left));
@@ -9122,6 +8946,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
amb_length_acceptor = leftpos /*- nmismatches_shortexon_left*/;
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,/*acceptor*/NULL,shortexon,
+ Doublelist_max(probs_donor),Substring_siteA_prob(shortexon),
+ Substring_siteD_prob(shortexon),Doublelist_max(probs_acceptor),
amb_length_donor,amb_length_acceptor,
/*amb_prob_donor*/2.0,/*amb_prob_acceptor*/2.0,
ambcoords_donor,ambcoords_acceptor,
@@ -9129,7 +8955,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*amb_nmismatches_donor*/nmismatches_list_right,/*amb_nmismatches_acceptor*/nmismatches_list_left,
/*amb_probs_donor*/probs_donor,/*amb_probs_acceptor*/probs_acceptor,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
+ splicing_penalty,querylength,first_read_p,sensedir,/*sarrayp*/false));
Doublelist_free(&probs_donor);
Doublelist_free(&probs_acceptor);
#ifdef LARGE_GENOMES
@@ -9153,14 +8979,14 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*acceptor_prob*/2.0,/*donor_prob*/2.0,
/*left*/segmentm_left,query_compress,
querylength,plusp,genestrand,first_read_p,
- sensep,/*acceptor_ambp*/false,/*donor_ambp*/true,
+ sensedir,/*acceptor_ambp*/false,/*donor_ambp*/true,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
debug4k(printf("donor at %d (%llu)\n",best_right_j,(unsigned long long) splicesites[best_right_j]));
donor = Substring_new_donor(/*donor_coord*/splicesites[best_right_j],/*donor_knowni*/best_right_j,
/*splice_pos*/rightpos,nmismatches_shortexon_right,
/*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos,
- query_compress,querylength,plusp,genestrand,first_read_p,sensep,segmentm->chrnum,
+ query_compress,querylength,plusp,genestrand,first_read_p,sensedir,segmentm->chrnum,
segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (donor == NULL || shortexon == NULL) {
@@ -9171,6 +8997,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
amb_length_acceptor = leftpos /*- nmismatches_shortexon_left*/;
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,/*acceptor*/NULL,shortexon,
+ Substring_chimera_prob(donor),Substring_siteA_prob(shortexon),
+ Substring_siteD_prob(shortexon),Doublelist_max(probs_acceptor),
/*amb_length_donor*/0,amb_length_acceptor,
/*amb_prob_donor*/0.0,/*amb_prob_acceptor*/2.0,
/*ambcoords_donor*/NULL,ambcoords_acceptor,
@@ -9178,7 +9006,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/nmismatches_list_left,
/*amb_probs_donor*/NULL,/*amb_probs_acceptor*/probs_acceptor,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
+ splicing_penalty,querylength,first_read_p,sensedir,/*sarrayp*/false));
Doublelist_free(&probs_acceptor);
#ifdef LARGE_GENOMES
Uint8list_free(&ambcoords_acceptor);
@@ -9195,7 +9023,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_left_j],/*acceptor_knowni*/best_left_j,
/*splice_pos*/leftpos,nmismatches_shortexon_left,
/*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos,
- query_compress,querylength,plusp,genestrand,first_read_p,sensep,segmentm->chrnum,
+ query_compress,querylength,plusp,genestrand,first_read_p,sensedir,segmentm->chrnum,
segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
debug4k(printf("shortexon with donor at %d (%llu) ... amb_acceptor at %d (%llu)\n",
@@ -9206,7 +9034,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*acceptor_prob*/2.0,/*donor_prob*/2.0,
/*left*/segmentm_left,query_compress,
querylength,plusp,genestrand,first_read_p,
- sensep,/*acceptor_ambp*/true,/*donor_ambp*/false,
+ sensedir,/*acceptor_ambp*/true,/*donor_ambp*/false,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (shortexon == NULL || acceptor == NULL) {
@@ -9219,6 +9047,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
amb_length_donor = querylength - rightpos /*- nmismatches_shortexon_right*/;
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),/*donor*/NULL,acceptor,shortexon,
+ Doublelist_max(probs_donor),Substring_siteA_prob(shortexon),
+ Substring_siteD_prob(shortexon),Substring_chimera_prob(acceptor),
amb_length_donor,/*amb_length_acceptor*/0,
/*amb_prob_donor*/2.0,/*amb_prob_acceptor*/0.0,
ambcoords_donor,/*ambcoords_acceptor*/NULL,
@@ -9226,7 +9056,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*amb_nmismatches_donor*/nmismatches_list_right,/*amb_nmismatches_acceptor*/NULL,
/*amb_probs_donor*/probs_donor,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
+ splicing_penalty,querylength,first_read_p,sensedir,/*sarrayp*/false));
Doublelist_free(&probs_donor);
#ifdef LARGE_GENOMES
Uint8list_free(&ambcoords_donor);
@@ -9241,7 +9071,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[best_left_j],/*acceptor_knowni*/best_left_j,
/*splice_pos*/leftpos,nmismatches_shortexon_left,
/*prob*/2.0,/*left*/splicesites[best_left_j]-leftpos,
- query_compress,querylength,plusp,genestrand,first_read_p,sensep,segmentm->chrnum,
+ query_compress,querylength,plusp,genestrand,first_read_p,sensedir,segmentm->chrnum,
segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
shortexon = Substring_new_shortexon(/*acceptor_coord*/splicesites[j2],/*acceptor_knowni*/j2,
@@ -9250,13 +9080,13 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
nmismatches_shortexon_middle,/*acceptor_prob*/2.0,/*donor_prob*/2.0,
/*left*/segmentm_left,query_compress,
querylength,plusp,genestrand,first_read_p,
- sensep,/*acceptor_ambp*/false,/*donor_ambp*/false,
+ sensedir,/*acceptor_ambp*/false,/*donor_ambp*/false,
segmentm->chrnum,segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
donor = Substring_new_donor(/*donor_coord*/splicesites[best_right_j],/*donor_knowni*/best_right_j,
/*splice_pos*/rightpos,nmismatches_shortexon_right,
/*prob*/2.0,/*left*/splicesites[best_right_j]-rightpos,
- query_compress,querylength,plusp,genestrand,first_read_p,sensep,segmentm->chrnum,
+ query_compress,querylength,plusp,genestrand,first_read_p,sensedir,segmentm->chrnum,
segmentm->chroffset,segmentm->chrhigh,segmentm->chrlength);
if (acceptor == NULL || shortexon == NULL || donor == NULL) {
@@ -9267,6 +9097,8 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
debug4k(printf("New shortexon at left %llu\n",(unsigned long long) segmentm_left));
segmentm->usedp = true;
hits = List_push(hits,(void *) Stage3end_new_shortexon(&(*found_score),donor,acceptor,shortexon,
+ Substring_chimera_prob(donor),Substring_siteA_prob(shortexon),
+ Substring_siteD_prob(shortexon),Substring_chimera_prob(acceptor),
/*amb_length_donor*/0,/*amb_length_acceptor*/0,
/*amb_prob_donor*/0.0,/*amb_prob_acceptor*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
@@ -9274,7 +9106,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
/*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
/*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
/*copy_donor_p*/false,/*copy_acceptor_p*/false,/*copy_shortexon_p*/false,
- splicing_penalty,querylength,sensedir,/*sarrayp*/false));
+ splicing_penalty,querylength,first_read_p,sensedir,/*sarrayp*/false));
}
}
Intlist_free(&nmismatches_list_right);
@@ -9303,7 +9135,7 @@ find_doublesplices (int *found_score, List_T hits, List_T *lowprob,
static void
find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors,
List_T **shortend_acceptors, List_T **shortend_antiacceptors,
- struct Segment_T *segments, int nsegments,
+ List_T anchor_segments,
#ifdef DEBUG4E
char *queryptr,
#endif
@@ -9313,6 +9145,7 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
char *gbuffer;
#endif
+ List_T p;
Segment_T segment;
Substring_T hit;
Univcoord_T segment_left;
@@ -9327,7 +9160,7 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
int nmismatches_left, nmismatches_right;
int *floors_from_neg3, *floors_to_pos3;
- bool sensep;
+ int sensedir;
int splice_pos_start, splice_pos_end;
#ifdef DEBUG4E
@@ -9336,15 +9169,14 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
debug4e(printf("Entering find_spliceends_shortend with %d segments\n",nsegments));
- if (nsegments > 0) {
+ if (floors != NULL) {
floors_from_neg3 = floors->scorefrom[-index1interval];
floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
- for (segment = segments; segment < &(segments[nsegments]); segment++) {
- if (segment->diagonal == (Univcoord_T) -1) {
- /* Skip chr marker segment */
-
- } else if (segment->splicesites_i >= 0) {
+ for (p = anchor_segments; p != NULL; p = List_next(p)) {
+ segment = (Segment_T) List_head(p);
+ assert(segment->diagonal != (Univcoord_T) -1);
+ if (segment->splicesites_i >= 0) {
segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
debug4e(printf("find_spliceends_shortend: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d\n",
max_mismatches_allowed,(unsigned long long) segment->diagonal,
@@ -9416,139 +9248,495 @@ find_spliceends_shortend (List_T **shortend_donors, List_T **shortend_antidonors
debug4e(printf("Known donor #%d at querypos %d\n",j,splicesites[j] - segment_left));
debug4e(printf("Known donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
segment_left,(unsigned long long) splice_pos,nmismatches,splice_pos_end));
- sensep = (plusp == true) ? true : false;
+ sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+
if ((hit = Substring_new_donor(/*donor_coord*/splicesites[j],/*donor_knowni*/j,splice_pos,nmismatches,
/*prob*/2.0,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,first_read_p,
- sensep,segment->chrnum,segment->chroffset,
+ sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s donor: known at %d (%d mismatches)\n",
plusp == true ? "plus" : "minus",Substring_chimera_pos(hit),nmismatches));
(*shortend_donors)[nmismatches] = List_push((*shortend_donors)[nmismatches],(void *) hit);
}
- } else if (splicetypes[j] == ANTIACCEPTOR) {
- debug4e(printf("Known antiacceptor #%d at querypos %d\n",j,splicesites[j] - segment_left));
- debug4e(printf("Known antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
- segment_left,(unsigned long long) splice_pos,nmismatches,splice_pos_end));
- sensep = (plusp == true) ? false : true;
- if ((hit = Substring_new_acceptor(/*acceptor_coord*/splicesites[j],/*acceptor_knowni*/j,
- splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
- querylength,plusp,genestrand,first_read_p,
- sensep,segment->chrnum,segment->chroffset,
- segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> %s antiacceptor : known at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",Substring_chimera_pos(hit),nmismatches));
- (*shortend_antiacceptors)[nmismatches] = List_push((*shortend_antiacceptors)[nmismatches],(void *) hit);
- }
- }
- }
- }
+ } else if (splicetypes[j] == ANTIACCEPTOR) {
+ debug4e(printf("Known antiacceptor #%d at querypos %d\n",j,splicesites[j] - segment_left));
+ debug4e(printf("Known antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ segment_left,(unsigned long long) splice_pos,nmismatches,splice_pos_end));
+ sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+
+ if ((hit = Substring_new_acceptor(/*acceptor_coord*/splicesites[j],/*acceptor_knowni*/j,
+ splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,first_read_p,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s antiacceptor : known at %d (%d mismatches)\n",
+ plusp == true ? "plus" : "minus",Substring_chimera_pos(hit),nmismatches));
+ (*shortend_antiacceptors)[nmismatches] = List_push((*shortend_antiacceptors)[nmismatches],(void *) hit);
+ }
+ }
+ }
+ }
+
+ /* Splice ends from splice site to right end */
+ if ((plusp == true && floors_to_pos3[segment->querypos3] <= max_mismatches_allowed) ||
+ (plusp == false && floors_from_neg3[segment->querypos5] <= max_mismatches_allowed)) {
+
+ /* pos5 was trimpos+1 */
+ nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
+ plusp,genestrand,first_read_p);
+
+ debug4e(
+ printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_right; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
+
+ splice_pos_end = querylength - 1; /* not query_lastpos */
+ if (nmismatches_right <= max_mismatches_allowed) {
+ splice_pos_start = 1;
+ } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
+ splice_pos_start = 1;
+ }
+
+ debug4e(printf("Search for splice sites from %d (%llu) down to %d (%llu)\n",
+ splice_pos_end,(unsigned long long) segment_left+splice_pos_end,
+ splice_pos_start,(unsigned long long) segment_left+splice_pos_start));
+
+ jstart = segment->splicesites_i;
+ while (jstart < nsplicesites && splicesites[jstart] < segment_left + splice_pos_start) {
+ jstart++;
+ }
+ jend = jstart;
+ while (jend < nsplicesites && splicesites[jend] <= segment_left + splice_pos_end) { /* Needs to be <= */
+ jend++;
+ }
+
+ nmismatches = 0;
+ for (j = jend - 1; j >= jstart; j--) {
+ debug4e(printf("splicesites_i #%d is at %llu\n",j,(unsigned long long) splicesites[j]));
+ splice_pos = splicesites[j] - segment_left;
+ while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
+ }
+#if 0
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
+ plusp,genestrand,first_read_p));
+#endif
+ if (nmismatches > max_mismatches_allowed) {
+ debug4e(printf("nmismatches %d > max_mismatches_allowed %d\n",nmismatches,max_mismatches_allowed));
+ } else if (splicetypes[j] == ACCEPTOR) {
+ debug4e(printf("Known acceptor #%d at querypos %d\n",j,splicesites[j] - segment_left));
+ debug4e(printf("Known acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
+ sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+
+ if ((hit = Substring_new_acceptor(/*acceptor_coord*/splicesites[j],/*acceptor_knowni*/j,
+ splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,first_read_p,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s acceptor: known at %d (%d mismatches)\n",
+ plusp == true ? "plus" : "minus",Substring_chimera_pos(hit),nmismatches));
+ (*shortend_acceptors)[nmismatches] = List_push((*shortend_acceptors)[nmismatches],(void *) hit);
+ }
+
+ } else if (splicetypes[j] == ANTIDONOR) {
+ debug4e(printf("Known antidonor #%d at querypos %d\n",j,splicesites[j] - segment_left));
+ debug4e(printf("Known antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
+ sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
+
+ if ((hit = Substring_new_donor(/*donor_coord*/splicesites[j],/*donor_knowni*/j,splice_pos,nmismatches,
+ /*prob*/2.0,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,first_read_p,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s antidonor: known at %d (%d mismatches)\n",
+ plusp == true ? "plus" : "minus",Substring_chimera_pos(hit),nmismatches));
+ (*shortend_antidonors)[nmismatches] = List_push((*shortend_antidonors)[nmismatches],(void *) hit);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return;
+}
+
+
+static void
+find_spliceends_distant_dna_plus (List_T **distant_startfrags, List_T **distant_endfrags,
+ List_T anchor_segments,
+#ifdef DEBUG4E
+ char *queryptr,
+#endif
+ Floors_T floors, int querylength, int query_lastpos, Compress_T query_compress,
+ int max_mismatches_allowed, int genestrand, bool first_read_p) {
+#ifdef DEBUG4E
+ char *gbuffer;
+#endif
+
+ List_T p;
+ Segment_T segment;
+ Substring_T hit;
+ Univcoord_T segment_left;
+ int nmismatches;
+ int splice_pos;
+
+ int nmismatches_left, nmismatches_right;
+ int *floors_from_neg3, *floors_to_pos3;
+
+ int splice_pos_start, splice_pos_end;
+ int i;
+
+#ifdef HAVE_ALLOCA
+ int *mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
+#else
+ int mismatch_positions[MAX_READLENGTH+1];
+#endif
+
+
+ debug4e(printf("Entering find_spliceends_distant_dna with %d segments\n",nsegments));
+
+ if (floors != NULL) {
+ floors_from_neg3 = floors->scorefrom[-index1interval];
+ floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
+
+ for (p = anchor_segments; p != NULL; p = List_next(p)) {
+ segment = (Segment_T) List_head(p);
+ assert(segment->diagonal != (Univcoord_T) -1);
+
+ segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
+ debug4e(printf("find_spliceends: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d\n",
+ max_mismatches_allowed,(unsigned long long) segment->diagonal,
+ segment->querypos5,segment->querypos3,querylength,(unsigned long long) segment_left,
+ floors_from_neg3[segment->querypos5],floors_to_pos3[segment->querypos3]));
+
+ debug4e(
+ gbuffer = (char *) CALLOC(querylength+1,sizeof(char));
+ Genome_fill_buffer_blocks(segment_left,querylength,gbuffer);
+ printf("genome 0..: %s\n",gbuffer);
+ printf("query 0..: %s\n",queryptr);
+ FREE(gbuffer);
+ );
+
+ /* Splice ends from left to splice site */
+ if (floors_from_neg3[segment->querypos5] <= max_mismatches_allowed) {
+
+ /* pos3 was trimpos */
+ nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
+ /*plusp*/true,genestrand,first_read_p);
+
+ debug4e(
+ printf("%d mismatches on left (%d allowed) at:",
+ nmismatches_left,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_left; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
+
+ splice_pos_start = index1part;
+ if (nmismatches_left <= max_mismatches_allowed) {
+ splice_pos_end = querylength - 1;
+ } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1) {
+ splice_pos_end = querylength - 1;
+ }
+
+ debug4e(printf("Allow all splice points from %d up to %d\n",splice_pos_start,splice_pos_end));
+
+ nmismatches = 0;
+ while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos_start) {
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
+ }
+
+ splice_pos = splice_pos_start;
+ while (splice_pos <= splice_pos_end && nmismatches <= max_mismatches_allowed) {
+ debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/0,/*pos3*/splice_pos,
+ /*plusp*/true,genestrand,first_read_p));
+ if ((hit = Substring_new_startfrag(/*startfrag_coord*/segment_left + splice_pos,
+ splice_pos,nmismatches,/*left*/segment_left,query_compress,
+ querylength,/*plusp*/true,genestrand,first_read_p,
+ segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> plus startfrag: at %d (%d mismatches)\n",Substring_chimera_pos(hit),nmismatches));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_startfrags)[nmismatches] = List_push((*distant_startfrags)[nmismatches],(void *) hit);
+ }
+
+ /* use splice_pos in the above loop because splice_pos defines a right substring boundary */
+ if (splice_pos++ == mismatch_positions[nmismatches]) {
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
+ }
+ }
+ }
+
+ /* Splice ends from splice site to right end */
+ if (floors_to_pos3[segment->querypos3] <= max_mismatches_allowed) {
+
+ /* pos5 was trimpos+1 */
+ nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
+ /*plusp*/true,genestrand,first_read_p);
+
+ debug4e(
+ printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_right; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
+
+ splice_pos_end = query_lastpos;
+ if (nmismatches_right <= max_mismatches_allowed) {
+ splice_pos_start = 1;
+ } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
+ splice_pos_start = 1;
+ }
+
+ debug4e(printf("Allow all splice sites from %d down to %d\n",splice_pos_end,splice_pos_start));
+
+ nmismatches = 0;
+ while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos_end) {
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
+ }
+
+ splice_pos = splice_pos_end;
+ while (splice_pos >= splice_pos_start && nmismatches <= max_mismatches_allowed) {
+ debug4e(printf(" splice pos %d, nmismatches (quick) %d, nmismatches (goldstd) %d\n",splice_pos,nmismatches,
+ Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
+ /*plusp*/true,genestrand,first_read_p)));
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
+ /*plusp*/true,genestrand,first_read_p));
+ if ((hit = Substring_new_endfrag(/*endfrag_coord*/segment_left + splice_pos,
+ splice_pos,nmismatches,/*left*/segment_left,query_compress,
+ querylength,/*plusp*/true,genestrand,first_read_p,
+ segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> plus endfrag: at %d (%d mismatches)\n",Substring_chimera_pos(hit),nmismatches));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_endfrags)[nmismatches] = List_push((*distant_endfrags)[nmismatches],(void *) hit);
+ }
+
+ /* use splice_pos for the next loop because splice_pos defines a left substring boundary */
+ if (--splice_pos == mismatch_positions[nmismatches]) {
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
+ }
+
+ }
+ }
+ }
+ }
+
+ return;
+}
+
+
+static void
+find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant_endfrags,
+ List_T anchor_segments,
+#ifdef DEBUG4E
+ char *queryptr,
+#endif
+ Floors_T floors, int querylength, int query_lastpos, Compress_T query_compress,
+ int max_mismatches_allowed, int genestrand, bool first_read_p) {
+#ifdef DEBUG4E
+ char *gbuffer;
+#endif
+
+ List_T p;
+ Segment_T segment;
+ Substring_T hit;
+ Univcoord_T segment_left;
+ int nmismatches;
+ int splice_pos;
+
+ int nmismatches_left, nmismatches_right;
+ int *floors_from_neg3, *floors_to_pos3;
+
+ int splice_pos_start, splice_pos_end;
+ int i;
+
+#ifdef HAVE_ALLOCA
+ int *mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
+#else
+ int mismatch_positions[MAX_READLENGTH+1];
+#endif
+
+
+ debug4e(printf("Entering find_spliceends_distant_dna with %d segments\n",nsegments));
+
+ if (floors != NULL) {
+ floors_from_neg3 = floors->scorefrom[-index1interval];
+ floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
+
+ for (p = anchor_segments; p != NULL; p = List_next(p)) {
+ segment = (Segment_T) List_head(p);
+ assert(segment->diagonal != (Univcoord_T) -1);
+
+ segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
+ debug4e(printf("find_spliceends: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d\n",
+ max_mismatches_allowed,(unsigned long long) segment->diagonal,
+ segment->querypos5,segment->querypos3,querylength,(unsigned long long) segment_left,
+ floors_from_neg3[segment->querypos5],floors_to_pos3[segment->querypos3]));
+
+ debug4e(
+ gbuffer = (char *) CALLOC(querylength+1,sizeof(char));
+ Genome_fill_buffer_blocks(segment_left,querylength,gbuffer);
+ printf("genome 0..: %s\n",gbuffer);
+ printf("query 0..: %s\n",queryptr);
+ FREE(gbuffer);
+ );
- /* Splice ends from splice site to right end */
- if ((plusp == true && floors_to_pos3[segment->querypos3] <= max_mismatches_allowed) ||
- (plusp == false && floors_from_neg3[segment->querypos5] <= max_mismatches_allowed)) {
+ /* Splice ends from left to splice site */
+ if (floors_to_pos3[segment->querypos3] <= max_mismatches_allowed) {
- /* pos5 was trimpos+1 */
- nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
- query_compress,/*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
- plusp,genestrand,first_read_p);
+ /* pos3 was trimpos */
+ nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
+ /*plusp*/false,genestrand,first_read_p);
- debug4e(
- printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
- for (i = 0; i <= nmismatches_right; i++) {
- printf(" %d",mismatch_positions[i]);
- }
- printf("\n");
- );
+ debug4e(
+ printf("%d mismatches on left (%d allowed) at:",
+ nmismatches_left,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_left; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
- splice_pos_end = querylength - 1; /* not query_lastpos */
- if (nmismatches_right <= max_mismatches_allowed) {
- splice_pos_start = 1;
- } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
- splice_pos_start = 1;
- }
+ splice_pos_start = index1part;
+ if (nmismatches_left <= max_mismatches_allowed) {
+ splice_pos_end = querylength - 1;
+ } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1) {
+ splice_pos_end = querylength - 1;
+ }
- debug4e(printf("Search for splice sites from %d (%llu) down to %d (%llu)\n",
- splice_pos_end,(unsigned long long) segment_left+splice_pos_end,
- splice_pos_start,(unsigned long long) segment_left+splice_pos_start));
+ debug4e(printf("Allow all splice points from %d up to %d\n",splice_pos_start,splice_pos_end));
- jstart = segment->splicesites_i;
- while (jstart < nsplicesites && splicesites[jstart] < segment_left + splice_pos_start) {
- jstart++;
+ nmismatches = 0;
+ while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos_start) {
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
+ }
+
+ splice_pos = splice_pos_start;
+ while (splice_pos <= splice_pos_end && nmismatches <= max_mismatches_allowed) {
+ debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/0,/*pos3*/splice_pos,
+ /*plusp*/false,genestrand,first_read_p));
+ if ((hit = Substring_new_endfrag(/*endfrag_coord*/segment_left + splice_pos,
+ splice_pos,nmismatches,/*left*/segment_left,query_compress,
+ querylength,/*plusp*/false,genestrand,first_read_p,
+ segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> minus endfrag: at %d (%d mismatches)\n",Substring_chimera_pos(hit),nmismatches));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_endfrags)[nmismatches] = List_push((*distant_endfrags)[nmismatches],(void *) hit);
}
- jend = jstart;
- while (jend < nsplicesites && splicesites[jend] <= segment_left + splice_pos_end) { /* Needs to be <= */
- jend++;
+
+ /* use splice_pos in the above loop because splice_pos defines a right substring boundary */
+ if (splice_pos++ == mismatch_positions[nmismatches]) {
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
}
+ }
+ }
- nmismatches = 0;
- for (j = jend - 1; j >= jstart; j--) {
- debug4e(printf("splicesites_i #%d is at %llu\n",j,(unsigned long long) splicesites[j]));
- splice_pos = splicesites[j] - segment_left;
- while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
- debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
- nmismatches++;
- }
-#if 0
- assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
- plusp,genestrand,first_read_p));
-#endif
- if (nmismatches > max_mismatches_allowed) {
- debug4e(printf("nmismatches %d > max_mismatches_allowed %d\n",nmismatches,max_mismatches_allowed));
- } else if (splicetypes[j] == ACCEPTOR) {
- debug4e(printf("Known acceptor #%d at querypos %d\n",j,splicesites[j] - segment_left));
- debug4e(printf("Known acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
- (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
- sensep = (plusp == true) ? true : false;
- if ((hit = Substring_new_acceptor(/*acceptor_coord*/splicesites[j],/*acceptor_knowni*/j,
- splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
- querylength,plusp,genestrand,first_read_p,
- sensep,segment->chrnum,segment->chroffset,
- segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> %s acceptor: known at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",Substring_chimera_pos(hit),nmismatches));
- (*shortend_acceptors)[nmismatches] = List_push((*shortend_acceptors)[nmismatches],(void *) hit);
- }
+ /* Splice ends from splice site to right end */
+ if (floors_from_neg3[segment->querypos5] <= max_mismatches_allowed) {
- } else if (splicetypes[j] == ANTIDONOR) {
- debug4e(printf("Known antidonor #%d at querypos %d\n",j,splicesites[j] - segment_left));
- debug4e(printf("Known antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
- (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
- sensep = (plusp == true) ? false : true;
- if ((hit = Substring_new_donor(/*donor_coord*/splicesites[j],/*donor_knowni*/j,splice_pos,nmismatches,
- /*prob*/2.0,/*left*/segment_left,query_compress,
- querylength,plusp,genestrand,first_read_p,
- sensep,segment->chrnum,segment->chroffset,
+ /* pos5 was trimpos+1 */
+ nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
+ /*plusp*/false,genestrand,first_read_p);
+
+ debug4e(
+ printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_right; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
+
+ splice_pos_end = query_lastpos;
+ if (nmismatches_right <= max_mismatches_allowed) {
+ splice_pos_start = 1;
+ } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
+ splice_pos_start = 1;
+ }
+
+ debug4e(printf("Allow all splice sites from %d down to %d\n",splice_pos_end,splice_pos_start));
+
+ nmismatches = 0;
+ while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos_end) {
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
+ }
+
+ splice_pos = splice_pos_end;
+ while (splice_pos >= splice_pos_start && nmismatches <= max_mismatches_allowed) {
+ debug4e(printf(" splice pos %d, nmismatches (quick) %d, nmismatches (goldstd) %d\n",splice_pos,nmismatches,
+ Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
+ /*plusp*/false,genestrand,first_read_p)));
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
+ /*plusp*/false,genestrand,first_read_p));
+ if ((hit = Substring_new_startfrag(/*startfrag_coord*/segment_left + splice_pos,
+ splice_pos,nmismatches,/*left*/segment_left,query_compress,
+ querylength,/*plusp*/false,genestrand,first_read_p,
+ segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> %s antidonor: known at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",Substring_chimera_pos(hit),nmismatches));
- (*shortend_antidonors)[nmismatches] = List_push((*shortend_antidonors)[nmismatches],(void *) hit);
- }
- }
+ debug4e(printf("=> minus startfrag: at %d (%d mismatches)\n",Substring_chimera_pos(hit),nmismatches));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_startfrags)[nmismatches] = List_push((*distant_startfrags)[nmismatches],(void *) hit);
+ }
+
+ /* use splice_pos for the next loop because splice_pos defines a left substring boundary */
+ if (--splice_pos == mismatch_positions[nmismatches]) {
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
}
}
}
}
}
-
+
return;
}
+/* Produces lists of distant_donors and distant_acceptors that are substrings */
+/* TODO: Change to lists of Stage3end_T objects, including GMAP.
+ Change definition of a chimera to be two Stage3end_T objects, instead
+ of two substrings. */
static void
-find_spliceends_distant (List_T **distant_donors, List_T **distant_antidonors,
- List_T **distant_acceptors, List_T **distant_antiacceptors,
- struct Segment_T *segments, int nsegments,
+find_spliceends_distant_rna (List_T **distant_donors, List_T **distant_antidonors,
+ List_T **distant_acceptors, List_T **distant_antiacceptors,
+ List_T anchor_segments,
#ifdef DEBUG4E
- char *queryptr,
+ char *queryptr,
#endif
- Floors_T floors, int querylength, int query_lastpos, Compress_T query_compress,
- int max_mismatches_allowed, bool plusp, int genestrand, bool first_read_p) {
+ Floors_T floors, int querylength, int query_lastpos, Compress_T query_compress,
+ int max_mismatches_allowed, bool plusp, int genestrand, bool first_read_p) {
#ifdef DEBUG4E
char *gbuffer;
#endif
+ List_T p;
Segment_T segment;
Substring_T hit;
Univcoord_T segment_left;
@@ -9558,7 +9746,7 @@ find_spliceends_distant (List_T **distant_donors, List_T **distant_antidonors,
int nmismatches_left, nmismatches_right;
int *floors_from_neg3, *floors_to_pos3;
- bool sensep;
+ int sensedir;
int splice_pos_start, splice_pos_end;
@@ -9590,427 +9778,430 @@ find_spliceends_distant (List_T **distant_donors, List_T **distant_antidonors,
int *donori_knowni, *acceptorj_knowni, *antiacceptori_knowni, *antidonorj_knowni;
- debug4e(printf("Entering find_spliceends_distant with %d segments\n",nsegments));
+ debug4e(printf("Entering find_spliceends_distant_rna with %d segments\n",nsegments));
- if (nsegments > 0) {
+ if (floors != NULL) {
floors_from_neg3 = floors->scorefrom[-index1interval];
floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
- for (segment = segments; segment < &(segments[nsegments]); segment++) {
- if (segment->diagonal < (Univcoord_T) -1) {
+ for (p = anchor_segments; p != NULL; p = List_next(p)) {
+ segment = (Segment_T) List_head(p);
+ assert(segment->diagonal != (Univcoord_T) -1);
+
+ segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
+ debug4e(printf("find_spliceends: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d\n",
+ max_mismatches_allowed,(unsigned long long) segment->diagonal,
+ segment->querypos5,segment->querypos3,querylength,(unsigned long long) segment_left,
+ floors_from_neg3[segment->querypos5],floors_to_pos3[segment->querypos3]));
+
+ debug4e(
+ gbuffer = (char *) CALLOC(querylength+1,sizeof(char));
+ Genome_fill_buffer_blocks(segment_left,querylength,gbuffer);
+ printf("genome 0..: %s\n",gbuffer);
+ printf("query 0..: %s\n",queryptr);
+ FREE(gbuffer);
+ );
- segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
- debug4e(printf("find_spliceends: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu, floors %d and %d\n",
- max_mismatches_allowed,(unsigned long long) segment->diagonal,
- segment->querypos5,segment->querypos3,querylength,(unsigned long long) segment_left,
- floors_from_neg3[segment->querypos5],floors_to_pos3[segment->querypos3]));
+ /* Splice ends from left to splice site */
+ if ((plusp == true && floors_from_neg3[segment->querypos5] <= max_mismatches_allowed) ||
+ (plusp == false && floors_to_pos3[segment->querypos3] <= max_mismatches_allowed)) {
+
+ /* pos3 was trimpos */
+ nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
+ plusp,genestrand,first_read_p);
debug4e(
- gbuffer = (char *) CALLOC(querylength+1,sizeof(char));
- Genome_fill_buffer_blocks(segment_left,querylength,gbuffer);
- printf("genome 0..: %s\n",gbuffer);
- printf("query 0..: %s\n",queryptr);
- FREE(gbuffer);
+ printf("%d mismatches on left (%d allowed) at:",
+ nmismatches_left,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_left; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
);
- /* Splice ends from left to splice site */
- if ((plusp == true && floors_from_neg3[segment->querypos5] <= max_mismatches_allowed) ||
- (plusp == false && floors_to_pos3[segment->querypos3] <= max_mismatches_allowed)) {
-
- /* pos3 was trimpos */
- nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
- query_compress,/*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
- plusp,genestrand,first_read_p);
-
- debug4e(
- printf("%d mismatches on left (%d allowed) at:",
- nmismatches_left,max_mismatches_allowed);
- for (i = 0; i <= nmismatches_left; i++) {
- printf(" %d",mismatch_positions[i]);
- }
- printf("\n");
- );
-
- splice_pos_start = index1part;
- if (nmismatches_left <= max_mismatches_allowed) {
- splice_pos_end = querylength - 1;
- } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1) {
- splice_pos_end = querylength - 1;
- }
+ splice_pos_start = index1part;
+ if (nmismatches_left <= max_mismatches_allowed) {
+ splice_pos_end = querylength - 1;
+ } else if ((splice_pos_end = mismatch_positions[nmismatches_left-1]) > querylength - 1) {
+ splice_pos_end = querylength - 1;
+ }
- if (splice_pos_start <= splice_pos_end) {
- debug4e(printf("Search for splice sites from %d up to %d\n",splice_pos_start,splice_pos_end));
-
- segment_donor_nknown = 0;
- segment_antiacceptor_nknown = 0;
- if ((j = segment->splicesites_i) >= 0) {
- /* Known splicing */
- /* Ends 1 (donor, plus) and 8 (antiacceptor, plus): mark known splice sites in segment */
- while (j < nsplicesites && splicesites[j] <= segment_left + splice_pos_end) { /* Needs to be <= */
- if (splicetypes[j] == DONOR) {
- segment_donor_knownpos[segment_donor_nknown] = splicesites[j] - segment_left;
- segment_donor_knowni[segment_donor_nknown++] = j;
- } else if (splicetypes[j] == ANTIACCEPTOR) {
- segment_antiacceptor_knownpos[segment_antiacceptor_nknown] = splicesites[j] - segment_left;
- segment_antiacceptor_knowni[segment_antiacceptor_nknown++] = j;
- }
- j++;
+ if (splice_pos_start <= splice_pos_end) {
+ debug4e(printf("Search for splice sites from %d up to %d\n",splice_pos_start,splice_pos_end));
+
+ segment_donor_nknown = 0;
+ segment_antiacceptor_nknown = 0;
+ if ((j = segment->splicesites_i) >= 0) {
+ /* Known splicing */
+ /* Ends 1 (donor, plus) and 8 (antiacceptor, plus): mark known splice sites in segment */
+ while (j < nsplicesites && splicesites[j] <= segment_left + splice_pos_end) { /* Needs to be <= */
+ if (splicetypes[j] == DONOR) {
+ segment_donor_knownpos[segment_donor_nknown] = splicesites[j] - segment_left;
+ segment_donor_knowni[segment_donor_nknown++] = j;
+ } else if (splicetypes[j] == ANTIACCEPTOR) {
+ segment_antiacceptor_knownpos[segment_antiacceptor_nknown] = splicesites[j] - segment_left;
+ segment_antiacceptor_knowni[segment_antiacceptor_nknown++] = j;
}
+ j++;
}
- segment_donor_knownpos[segment_donor_nknown] = querylength;
- segment_antiacceptor_knownpos[segment_antiacceptor_nknown] = querylength;
-
- /* Originally on plus strand. No complement */
- sensep = (plusp == true) ? true : false;
- if (novelsplicingp && segment_left + splice_pos_start >= DONOR_MODEL_LEFT_MARGIN) {
- donori_nsites = Genome_donor_positions(positions_alloc,knowni_alloc,
- segment_donor_knownpos,segment_donor_knowni,
- segment_left,splice_pos_start,splice_pos_end+1);
- donori_positions = positions_alloc;
- donori_knowni = knowni_alloc;
- debug4e(
- printf("Donor dinucleotides:");
- for (i = 0; i < donori_nsites; i++) {
- printf(" %d",donori_positions[i]);
- }
- printf("\n");
- );
- } else {
- donori_nsites = segment_donor_nknown;
- donori_positions = segment_donor_knownpos;
- donori_knowni = segment_donor_knowni;
- }
+ }
+ segment_donor_knownpos[segment_donor_nknown] = querylength;
+ segment_antiacceptor_knownpos[segment_antiacceptor_nknown] = querylength;
+
+ /* Originally on plus strand. No complement */
+ sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+
+ if (novelsplicingp && segment_left + splice_pos_start >= DONOR_MODEL_LEFT_MARGIN) {
+ donori_nsites = Genome_donor_positions(positions_alloc,knowni_alloc,
+ segment_donor_knownpos,segment_donor_knowni,
+ segment_left,splice_pos_start,splice_pos_end+1);
+ donori_positions = positions_alloc;
+ donori_knowni = knowni_alloc;
+ debug4e(
+ printf("Donor dinucleotides:");
+ for (i = 0; i < donori_nsites; i++) {
+ printf(" %d",donori_positions[i]);
+ }
+ printf("\n");
+ );
+ } else {
+ donori_nsites = segment_donor_nknown;
+ donori_positions = segment_donor_knownpos;
+ donori_knowni = segment_donor_knowni;
+ }
- i = 0;
- nmismatches = 0;
- while (i < donori_nsites && nmismatches <= max_mismatches_allowed) {
- splice_pos = donori_positions[i];
- while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos) { /* Changed from <= to < */
- debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
- nmismatches++;
- }
- debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+ i = 0;
+ nmismatches = 0;
+ while (i < donori_nsites && nmismatches <= max_mismatches_allowed) {
+ splice_pos = donori_positions[i];
+ while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos) { /* Changed from <= to < */
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
+ }
+ debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
#if 0
- assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/0,/*pos3*/splice_pos,
- plusp,genestrand,first_read_p));
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/0,/*pos3*/splice_pos,
+ plusp,genestrand,first_read_p));
#endif
- if (nmismatches <= max_mismatches_allowed) {
- if (donori_knowni[i] >= 0) {
- debug4e(printf("Known donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
- (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
+ if (nmismatches <= max_mismatches_allowed) {
+ if (donori_knowni[i] >= 0) {
+ debug4e(printf("Known donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
- if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/donori_knowni[i],
- splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/donori_knowni[i],
+ splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,first_read_p,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s donor: %f at %d (%d mismatches)\n",
+ plusp == true ? "plus" : "minus",Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset),
+ Substring_chimera_pos(hit),nmismatches));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit);
+ }
+
+ } else {
+ prob = Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset);
+ debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+ splice_pos,nmismatches,prob,sufficient_splice_prob_distant(splice_pos,nmismatches,prob)));
+ if (sufficient_splice_prob_distant(/*support*/splice_pos,nmismatches,prob)) {
+ debug4e(printf("Novel donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
+ if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
+ splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,first_read_p,
- sensep,segment->chrnum,segment->chroffset,
+ sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s donor: %f at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset),
- Substring_chimera_pos(hit),nmismatches));
+ plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit);
}
-
- } else {
- prob = Maxent_hr_donor_prob(segment_left + splice_pos,segment->chroffset);
- debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
- splice_pos,nmismatches,prob,sufficient_splice_prob_distant(splice_pos,nmismatches,prob)));
- if (sufficient_splice_prob_distant(/*support*/splice_pos,nmismatches,prob)) {
- debug4e(printf("Novel donor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
- (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
- if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
- splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
- querylength,plusp,genestrand,first_read_p,
- sensep,segment->chrnum,segment->chroffset,
- segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> %s donor: %f at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches));
- debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
- (*distant_donors)[nmismatches] = List_push((*distant_donors)[nmismatches],(void *) hit);
- }
- }
}
}
-
- i++;
}
+ i++;
+ }
+
- /* Splicing originally on minus strand. Complement */
- sensep = (plusp == true) ? false : true;
- if (novelsplicingp && segment_left + splice_pos_start >= ACCEPTOR_MODEL_RIGHT_MARGIN) {
- antiacceptori_nsites = Genome_antiacceptor_positions(positions_alloc,knowni_alloc,
- segment_antiacceptor_knownpos,segment_antiacceptor_knowni,
- segment_left,splice_pos_start,splice_pos_end+1);
- antiacceptori_positions = positions_alloc;
- antiacceptori_knowni = knowni_alloc;
- debug4e(
- printf("Antiacceptor dinucleotides:");
- for (i = 0; i < antiacceptori_nsites; i++) {
- printf(" %d",antiacceptori_positions[i]);
- }
- printf("\n");
- );
- } else {
- antiacceptori_nsites = segment_antiacceptor_nknown;
- antiacceptori_positions = segment_antiacceptor_knownpos;
- antiacceptori_knowni = segment_antiacceptor_knowni;
- }
+ /* Splicing originally on minus strand. Complement */
+ sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
- i = 0;
- nmismatches = 0;
- while (i < antiacceptori_nsites && nmismatches <= max_mismatches_allowed) {
- splice_pos = antiacceptori_positions[i];
- while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos) { /* Changed from <= to < */
- debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
- nmismatches++;
- }
- debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+ if (novelsplicingp && segment_left + splice_pos_start >= ACCEPTOR_MODEL_RIGHT_MARGIN) {
+ antiacceptori_nsites = Genome_antiacceptor_positions(positions_alloc,knowni_alloc,
+ segment_antiacceptor_knownpos,segment_antiacceptor_knowni,
+ segment_left,splice_pos_start,splice_pos_end+1);
+ antiacceptori_positions = positions_alloc;
+ antiacceptori_knowni = knowni_alloc;
+ debug4e(
+ printf("Antiacceptor dinucleotides:");
+ for (i = 0; i < antiacceptori_nsites; i++) {
+ printf(" %d",antiacceptori_positions[i]);
+ }
+ printf("\n");
+ );
+ } else {
+ antiacceptori_nsites = segment_antiacceptor_nknown;
+ antiacceptori_positions = segment_antiacceptor_knownpos;
+ antiacceptori_knowni = segment_antiacceptor_knowni;
+ }
+
+ i = 0;
+ nmismatches = 0;
+ while (i < antiacceptori_nsites && nmismatches <= max_mismatches_allowed) {
+ splice_pos = antiacceptori_positions[i];
+ while (nmismatches < nmismatches_left && mismatch_positions[nmismatches] < splice_pos) { /* Changed from <= to < */
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
+ }
+ debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
#if 0
- assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/0,/*pos3*/splice_pos,
- plusp,genestrand,first_read_p));
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/0,/*pos3*/splice_pos,
+ plusp,genestrand,first_read_p));
#endif
- if (nmismatches <= max_mismatches_allowed) {
- if (antiacceptori_knowni[i] >= 0) {
- debug4e(printf("Known antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ if (nmismatches <= max_mismatches_allowed) {
+ if (antiacceptori_knowni[i] >= 0) {
+ debug4e(printf("Known antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
+ if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/antiacceptori_knowni[i],
+ splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,first_read_p,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches)\n",
+ plusp == true ? "plus" : "minus",Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset),
+ Substring_chimera_pos(hit),nmismatches));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit);
+ }
+
+ } else {
+ prob = Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset);
+ debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+ splice_pos,nmismatches,prob,sufficient_splice_prob_distant(splice_pos,nmismatches,prob)));
+ if (sufficient_splice_prob_distant(/*support*/splice_pos,nmismatches,prob)) {
+ debug4e(printf("Novel antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
(unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
- if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/antiacceptori_knowni[i],
- splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
+ splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,first_read_p,
- sensep,segment->chrnum,segment->chroffset,
+ sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset),
- Substring_chimera_pos(hit),nmismatches));
+ plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit);
}
-
- } else {
- prob = Maxent_hr_antiacceptor_prob(segment_left + splice_pos,segment->chroffset);
- debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
- splice_pos,nmismatches,prob,sufficient_splice_prob_distant(splice_pos,nmismatches,prob)));
- if (sufficient_splice_prob_distant(/*support*/splice_pos,nmismatches,prob)) {
- debug4e(printf("Novel antiacceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
- (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_end));
- if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
- splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
- querylength,plusp,genestrand,first_read_p,
- sensep,segment->chrnum,segment->chroffset,
- segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> %s antiacceptor : %f at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches));
- debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
- (*distant_antiacceptors)[nmismatches] = List_push((*distant_antiacceptors)[nmismatches],(void *) hit);
- }
- }
}
}
-
- i++;
}
- }
+ i++;
+ }
}
- /* Splice ends from splice site to right end */
- if ((plusp == true && floors_to_pos3[segment->querypos3] <= max_mismatches_allowed) ||
- (plusp == false && floors_from_neg3[segment->querypos5] <= max_mismatches_allowed)) {
-
- /* pos5 was trimpos+1 */
- nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
- query_compress,/*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
- plusp,genestrand,first_read_p);
+ }
- debug4e(
- printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
- for (i = 0; i <= nmismatches_right; i++) {
- printf(" %d",mismatch_positions[i]);
- }
- printf("\n");
- );
+ /* Splice ends from splice site to right end */
+ if ((plusp == true && floors_to_pos3[segment->querypos3] <= max_mismatches_allowed) ||
+ (plusp == false && floors_from_neg3[segment->querypos5] <= max_mismatches_allowed)) {
- splice_pos_end = query_lastpos;
- if (nmismatches_right <= max_mismatches_allowed) {
- splice_pos_start = 1;
- } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
- splice_pos_start = 1;
- }
+ /* pos5 was trimpos+1 */
+ nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
+ query_compress,/*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
+ plusp,genestrand,first_read_p);
- if (splice_pos_start <= splice_pos_end) {
- debug4e(printf("Search for splice sites from %d down to %d\n",splice_pos_end,splice_pos_start));
-
- segment_acceptor_nknown = 0;
- segment_antidonor_nknown = 0;
- if ((j = segment->splicesites_i) >= 0) {
- /* Known splicing */
- while (j < nsplicesites && splicesites[j] <= segment_left + splice_pos_end) { /* Needs to be <= */
- if (splicetypes[j] == ACCEPTOR) {
- debug4k(printf("Setting known acceptor %d for segment at %llu\n",j,(unsigned long long) splicesites[j]));
- segment_acceptor_knownpos[segment_acceptor_nknown] = splicesites[j] - segment_left;
- segment_acceptor_knowni[segment_acceptor_nknown++] = j;
- } else if (splicetypes[j] == ANTIDONOR) {
- debug4k(printf("Setting known antidonor %d for segment at %llu\n",j,(unsigned long long) splicesites[j]));
- segment_antidonor_knownpos[segment_antidonor_nknown] = splicesites[j] - segment_left;
- segment_antidonor_knowni[segment_antidonor_nknown++] = j;
+ debug4e(
+ printf("%d mismatches on right (%d allowed) at:",nmismatches_right,max_mismatches_allowed);
+ for (i = 0; i <= nmismatches_right; i++) {
+ printf(" %d",mismatch_positions[i]);
}
- j++;
+ printf("\n");
+ );
+
+ splice_pos_end = query_lastpos;
+ if (nmismatches_right <= max_mismatches_allowed) {
+ splice_pos_start = 1;
+ } else if ((splice_pos_start = mismatch_positions[nmismatches_right-1]) < 1) {
+ splice_pos_start = 1;
+ }
+
+ if (splice_pos_start <= splice_pos_end) {
+ debug4e(printf("Search for splice sites from %d down to %d\n",splice_pos_end,splice_pos_start));
+
+ segment_acceptor_nknown = 0;
+ segment_antidonor_nknown = 0;
+ if ((j = segment->splicesites_i) >= 0) {
+ /* Known splicing */
+ while (j < nsplicesites && splicesites[j] <= segment_left + splice_pos_end) { /* Needs to be <= */
+ if (splicetypes[j] == ACCEPTOR) {
+ debug4k(printf("Setting known acceptor %d for segment at %llu\n",j,(unsigned long long) splicesites[j]));
+ segment_acceptor_knownpos[segment_acceptor_nknown] = splicesites[j] - segment_left;
+ segment_acceptor_knowni[segment_acceptor_nknown++] = j;
+ } else if (splicetypes[j] == ANTIDONOR) {
+ debug4k(printf("Setting known antidonor %d for segment at %llu\n",j,(unsigned long long) splicesites[j]));
+ segment_antidonor_knownpos[segment_antidonor_nknown] = splicesites[j] - segment_left;
+ segment_antidonor_knowni[segment_antidonor_nknown++] = j;
}
+ j++;
}
- segment_acceptor_knownpos[segment_acceptor_nknown] = querylength;
- segment_antidonor_knownpos[segment_antidonor_nknown] = querylength;
-
+ }
+ segment_acceptor_knownpos[segment_acceptor_nknown] = querylength;
+ segment_antidonor_knownpos[segment_antidonor_nknown] = querylength;
+
+
+ /* Splicing originally on plus strand. No complement. */
+ sensedir = (plusp == true) ? SENSE_FORWARD : SENSE_ANTI;
+
+ if (novelsplicingp && segment_left + splice_pos_start >= ACCEPTOR_MODEL_LEFT_MARGIN) {
+ acceptorj_nsites = Genome_acceptor_positions(positions_alloc,knowni_alloc,
+ segment_acceptor_knownpos,segment_acceptor_knowni,
+ segment_left,splice_pos_start,splice_pos_end+1);
+ acceptorj_positions = positions_alloc;
+ acceptorj_knowni = knowni_alloc;
+ debug4e(
+ printf("Acceptor dinucleotides:");
+ for (i = 0; i < acceptorj_nsites; i++) {
+ printf(" %d",acceptorj_positions[i]);
+ }
+ printf("\n");
+ );
+ } else {
+ acceptorj_nsites = segment_acceptor_nknown;
+ acceptorj_positions = segment_acceptor_knownpos;
+ acceptorj_knowni = segment_acceptor_knowni;
+ }
- /* Splicing originally on plus strand. No complement. */
- sensep = (plusp == true) ? true : false;
- if (novelsplicingp && segment_left + splice_pos_start >= ACCEPTOR_MODEL_LEFT_MARGIN) {
- acceptorj_nsites = Genome_acceptor_positions(positions_alloc,knowni_alloc,
- segment_acceptor_knownpos,segment_acceptor_knowni,
- segment_left,splice_pos_start,splice_pos_end+1);
- acceptorj_positions = positions_alloc;
- acceptorj_knowni = knowni_alloc;
- debug4e(
- printf("Acceptor dinucleotides:");
- for (i = 0; i < acceptorj_nsites; i++) {
- printf(" %d",acceptorj_positions[i]);
- }
- printf("\n");
- );
- } else {
- acceptorj_nsites = segment_acceptor_nknown;
- acceptorj_positions = segment_acceptor_knownpos;
- acceptorj_knowni = segment_acceptor_knowni;
+ i = acceptorj_nsites - 1;
+ nmismatches = 0;
+ while (i >= 0 && nmismatches <= max_mismatches_allowed) {
+ splice_pos = acceptorj_positions[i];
+ while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
}
-
- i = acceptorj_nsites - 1;
- nmismatches = 0;
- while (i >= 0 && nmismatches <= max_mismatches_allowed) {
- splice_pos = acceptorj_positions[i];
- while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
- debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
- nmismatches++;
- }
- debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+ debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
#if 0
- assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
- plusp,genestrand,first_read_p));
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
+ plusp,genestrand,first_read_p));
#endif
- if (nmismatches <= max_mismatches_allowed) {
- if (acceptorj_knowni[i] >= 0) {
- debug4e(printf("Known acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ if (nmismatches <= max_mismatches_allowed) {
+ if (acceptorj_knowni[i] >= 0) {
+ debug4e(printf("Known acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
+ if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/acceptorj_knowni[i],
+ splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,first_read_p,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s acceptor: %f at %d (%d mismatches)\n",
+ plusp == true ? "plus" : "minus",Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset),
+ Substring_chimera_pos(hit),nmismatches));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit);
+ }
+
+ } else {
+ prob = Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset);
+ debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+ splice_pos,nmismatches,prob,sufficient_splice_prob_distant(querylength - splice_pos,nmismatches,prob)));
+ if (sufficient_splice_prob_distant(/*support*/querylength - splice_pos,nmismatches,prob)) {
+ debug4e(printf("Novel acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
(unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
- if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/acceptorj_knowni[i],
- splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
+ splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,first_read_p,
- sensep,segment->chrnum,segment->chroffset,
+ sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s acceptor: %f at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset),
- Substring_chimera_pos(hit),nmismatches));
+ plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit);
}
-
- } else {
- prob = Maxent_hr_acceptor_prob(segment_left + splice_pos,segment->chroffset);
- debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
- splice_pos,nmismatches,prob,sufficient_splice_prob_distant(querylength - splice_pos,nmismatches,prob)));
- if (sufficient_splice_prob_distant(/*support*/querylength - splice_pos,nmismatches,prob)) {
- debug4e(printf("Novel acceptor for segment at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
- (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
- if ((hit = Substring_new_acceptor(/*acceptor_coord*/segment_left + splice_pos,/*acceptor_knowni*/-1,
- splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
- querylength,plusp,genestrand,first_read_p,
- sensep,segment->chrnum,segment->chroffset,
- segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> %s acceptor: %f at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches));
- debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
- (*distant_acceptors)[nmismatches] = List_push((*distant_acceptors)[nmismatches],(void *) hit);
- }
- }
}
}
-
- i--;
}
+
+ i--;
+ }
- /* Splicing originally on minus strand. Complement. */
- sensep = (plusp == true) ? false : true;
- if (novelsplicingp && segment_left + splice_pos_start >= DONOR_MODEL_RIGHT_MARGIN) {
- antidonorj_nsites = Genome_antidonor_positions(positions_alloc,knowni_alloc,
- segment_antidonor_knownpos,segment_antidonor_knowni,
- segment_left,splice_pos_start,splice_pos_end+1);
- antidonorj_positions = positions_alloc;
- antidonorj_knowni = knowni_alloc;
- debug4e(
- printf("Antidonor dinucleotides:");
- for (i = 0; i < antidonorj_nsites; i++) {
- printf(" %d",antidonorj_positions[i]);
- }
- printf("\n");
- );
- } else {
- antidonorj_nsites = segment_antidonor_nknown;
- antidonorj_positions = segment_antidonor_knownpos;
- antidonorj_knowni = segment_antidonor_knowni;
- }
+ /* Splicing originally on minus strand. Complement. */
+ sensedir = (plusp == true) ? SENSE_ANTI : SENSE_FORWARD;
- i = antidonorj_nsites - 1;
- nmismatches = 0;
- while (i >= 0 && nmismatches <= max_mismatches_allowed) {
- splice_pos = antidonorj_positions[i];
- while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
- debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
- nmismatches++;
- }
- debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
+ if (novelsplicingp && segment_left + splice_pos_start >= DONOR_MODEL_RIGHT_MARGIN) {
+ antidonorj_nsites = Genome_antidonor_positions(positions_alloc,knowni_alloc,
+ segment_antidonor_knownpos,segment_antidonor_knowni,
+ segment_left,splice_pos_start,splice_pos_end+1);
+ antidonorj_positions = positions_alloc;
+ antidonorj_knowni = knowni_alloc;
+ debug4e(
+ printf("Antidonor dinucleotides:");
+ for (i = 0; i < antidonorj_nsites; i++) {
+ printf(" %d",antidonorj_positions[i]);
+ }
+ printf("\n");
+ );
+ } else {
+ antidonorj_nsites = segment_antidonor_nknown;
+ antidonorj_positions = segment_antidonor_knownpos;
+ antidonorj_knowni = segment_antidonor_knowni;
+ }
+
+ i = antidonorj_nsites - 1;
+ nmismatches = 0;
+ while (i >= 0 && nmismatches <= max_mismatches_allowed) {
+ splice_pos = antidonorj_positions[i];
+ while (nmismatches < nmismatches_right && mismatch_positions[nmismatches] >= splice_pos) { /* Must be >= */
+ debug4e(printf(" mismatch at %d\n",mismatch_positions[nmismatches]));
+ nmismatches++;
+ }
+ debug4e(printf(" splice pos %d, nmismatches %d\n",splice_pos,nmismatches));
#if 0
- assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
- plusp,genestrand,first_read_p));
+ assert(nmismatches == Genome_count_mismatches_substring(query_compress,segment_left,/*pos5*/splice_pos,/*pos3*/querylength,
+ plusp,genestrand,first_read_p));
#endif
- if (nmismatches <= max_mismatches_allowed) {
- if (antidonorj_knowni[i] >= 0) {
- debug4e(printf("Known antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ if (nmismatches <= max_mismatches_allowed) {
+ if (antidonorj_knowni[i] >= 0) {
+ debug4e(printf("Known antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
+ (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
+ if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/antidonorj_knowni[i],
+ splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ querylength,plusp,genestrand,first_read_p,
+ sensedir,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength)) != NULL) {
+ debug4e(printf("=> %s antidonor: %f at %d (%d mismatches)\n",
+ plusp == true ? "plus" : "minus",Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset),
+ Substring_chimera_pos(hit),nmismatches));
+ debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
+ (*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
+ }
+
+ } else {
+ prob = Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset);
+ debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
+ splice_pos,nmismatches,prob,sufficient_splice_prob_distant(querylength - splice_pos,nmismatches,prob)));
+ if (sufficient_splice_prob_distant(/*support*/querylength - splice_pos,nmismatches,prob)) {
+ debug4e(printf("Novel antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
(unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
- if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/antidonorj_knowni[i],
- splice_pos,nmismatches,/*prob*/2.0,/*left*/segment_left,query_compress,
+ if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
+ splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
querylength,plusp,genestrand,first_read_p,
- sensep,segment->chrnum,segment->chroffset,
+ sensedir,segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength)) != NULL) {
debug4e(printf("=> %s antidonor: %f at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset),
- Substring_chimera_pos(hit),nmismatches));
+ plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches));
debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
(*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
}
-
- } else {
- prob = Maxent_hr_antidonor_prob(segment_left + splice_pos,segment->chroffset);
- debug4e(printf("splice pos %d, nmismatches %d, prob %f, sufficient %d\n",
- splice_pos,nmismatches,prob,sufficient_splice_prob_distant(querylength - splice_pos,nmismatches,prob)));
- if (sufficient_splice_prob_distant(/*support*/querylength - splice_pos,nmismatches,prob)) {
- debug4e(printf("Novel antidonor for segmenti at %llu, splice_pos %d (%d mismatches), stopi = %d\n",
- (unsigned long long) segment_left,splice_pos,nmismatches,splice_pos_start));
- if ((hit = Substring_new_donor(/*donor_coord*/segment_left + splice_pos,/*donor_knowni*/-1,
- splice_pos,nmismatches,prob,/*left*/segment_left,query_compress,
- querylength,plusp,genestrand,first_read_p,
- sensep,segment->chrnum,segment->chroffset,
- segment->chrhigh,segment->chrlength)) != NULL) {
- debug4e(printf("=> %s antidonor: %f at %d (%d mismatches)\n",
- plusp == true ? "plus" : "minus",prob,Substring_chimera_pos(hit),nmismatches));
- debug4e(printf("q: %s\ng: %s\n",queryptr,gbuffer));
- (*distant_antidonors)[nmismatches] = List_push((*distant_antidonors)[nmismatches],(void *) hit);
- }
- }
}
}
-
- i--;
}
- }
+ i--;
+ }
}
}
}
@@ -10024,12 +10215,8 @@ find_spliceends_distant (List_T **distant_donors, List_T **distant_antidonors,
/* Integrates terminals found from ends by counting mismatches, and
those where querypos3 - querypos5 is long enough */
static List_T
-find_terminals (struct Segment_T *plus_segments, int plus_nsegments,
- struct Segment_T *minus_segments, int minus_nsegments,
-#ifdef DEBUG4T
- char *queryuc_ptr, /* for debugging */ char *queryrc,
-#endif
- Floors_T floors, int querylength, int query_lastpos,
+find_terminals (List_T plus_anchor_segments, List_T minus_anchor_segments,
+ int querylength, int query_lastpos,
Compress_T query_compress_fwd, Compress_T query_compress_rev,
int max_mismatches_allowed, int genestrand, bool first_read_p) {
#ifdef DEBUG4T
@@ -10049,7 +10236,7 @@ find_terminals (struct Segment_T *plus_segments, int plus_nsegments,
int mismatch_positions[MAX_READLENGTH+1];
#endif
- int *floors_from_neg3, *floors_to_pos3;
+ /* int *floors_from_neg3, *floors_to_pos3; */
int max_terminal_length;
int nterminals_left, nterminals_right, nterminals_middle;
@@ -10059,451 +10246,442 @@ find_terminals (struct Segment_T *plus_segments, int plus_nsegments,
debug(printf("identify_terminals: Checking up to %d mismatches\n",max_mismatches_allowed));
- if (floors == NULL) {
- return (List_T) NULL;
+ /* floors_from_neg3 = floors->scorefrom[-index1interval]; */
+ /* floors_to_pos3 = floors->scoreto[query_lastpos+index1interval]; */
+
+ /* Needs to be /3 for long_terminals and short_terminals to work */
+ max_terminal_length = querylength/3;
+ if (max_terminal_length < index1part) {
+ max_terminal_length = index1part;
+ }
+
+ nterminals_left = nterminals_right = nterminals_middle = 0;
+ for (p = plus_anchor_segments; p != NULL && (/*nterminals_middle < MAX_NTERMINALS ||*/ nterminals_left < MAX_NTERMINALS || nterminals_right < MAX_NTERMINALS);
+ p = List_next(p)) {
+ segment = (Segment_T) List_head(p);
+ if (0 && segment->usedp == true) {
+ /* Previously skipped, but looks like a bad idea */
+ } else if (segment->diagonal < (Univcoord_T) -1) {
+ debug4t(printf("plus: %llu, %d..%d\n",(unsigned long long) segment->diagonal,segment->querypos5,segment->querypos3));
+ segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
+ debug4t(printf("identify_terminals_plus: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu\n",
+ max_mismatches_allowed,(unsigned long long) segment->diagonal,
+ segment->querypos5,segment->querypos3,querylength,(unsigned long long) segment_left));
+ debug4t(
+ gbuffer = (char *) CALLOC(querylength+1,sizeof(char));
+ Genome_fill_buffer_blocks(segment_left,querylength,gbuffer);
+ printf("genome 0..: %s\n",gbuffer);
+ /* printf("query 0..: %s\n",queryuc_ptr); */
+ FREE(gbuffer);
+ );
- } else {
- floors_from_neg3 = floors->scorefrom[-index1interval];
- floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
+#ifdef ALLOW_MIDDLE_ALIGNMENTS
+ if (segment->querypos3 - segment->querypos5 > max_terminal_length /* was index1part */) {
+ /* Check for middle section */
+ debug4t(printf(" => ? Middle alignment based on querypos3 %d - querypos5 %d > max_terminal_length %d",
+ segment->querypos3,segment->querypos5,max_terminal_length));
+ if (nterminals_middle >= MAX_NTERMINALS) {
+ /* Skip */
+ debug4t(printf(" => Skipping because too many nterminals_middle"));
+ } else {
+ start_endtype = (segment->querypos5 < index1interval) ? END : TERM;
+ end_endtype = (segment->querypos3 >= query_lastpos - index1interval) ? END : TERM;
+ debug4t(printf(" querypos3 %d vs index1interval %d => start_endtype %s\n",
+ segment->querypos3,index1interval,Endtype_string(start_endtype)));
+ debug4t(printf(" querypos5 %d vs query_lastpos %d - index1interval %d => end_endtype %s\n",
+ segment->querypos5,query_lastpos,index1interval,Endtype_string(end_endtype)));
+
+ if ((hit = Stage3end_new_terminal(/*querystart*/0,/*queryend*//*truncate_pos_left*/querylength,
+ /*left*/segment_left,query_compress_fwd,
+ querylength,/*plusp*/true,genestrand,first_read_p,
+ start_endtype,end_endtype,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength,max_mismatches_allowed,
+ /*sarrayp*/false)) != NULL) {
+ debug4t(printf(" => yes, with %d matches",Stage3end_nmatches_posttrim(hit)));
+ plus_terminals_middle = List_push(plus_terminals_middle,(void *) hit);
+ nterminals_middle += 1;
+ } else {
+ debug4t(printf(" => no"));
+ }
+ }
+ debug4t(printf("\n"));
+
+ } else {
+#endif
+
+ if (nterminals_left >= MAX_NTERMINALS) {
+ /* Skip */
+ } else if (segment->floor_left > max_mismatches_allowed) {
+ debug4t(printf("Not checking left because floor_left %d > max_mismatches_allowed %d\n",
+ segment->floor_left,max_mismatches_allowed));
+ } else {
+ /* Check from left */
+ debug4t(printf("Checking left because floor_left %d <= max_mismatches_allowed %d\n",
+ segment->floor_left,max_mismatches_allowed));
- /* Needs to be /3 for long_terminals and short_terminals to work */
- max_terminal_length = querylength/3;
- if (max_terminal_length < index1part) {
- max_terminal_length = index1part;
- }
- }
+ nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
+ query_compress_fwd,/*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
+ /*plusp*/true,genestrand,first_read_p);
+
+ debug4t(
+ printf("%d mismatches on left at:",nmismatches_left);
+ for (i = 0; i <= nmismatches_left; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
- if (plus_nsegments > 0) {
- nterminals_left = nterminals_right = nterminals_middle = 0;
- for (segment = plus_segments; (/*nterminals_middle < MAX_NTERMINALS ||*/ nterminals_left < MAX_NTERMINALS || nterminals_right < MAX_NTERMINALS) &&
- segment < &(plus_segments[plus_nsegments]); segment++) {
- if (0 && segment->usedp == true) {
- /* Previously skipped, but looks like a bad idea */
- } else if (segment->diagonal < (Univcoord_T) -1) {
- debug4t(printf("plus: %llu, %d..%d\n",(unsigned long long) segment->diagonal,segment->querypos5,segment->querypos3));
- segment_left = segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
- debug4t(printf("identify_terminals_plus: Checking up to %d mismatches at diagonal %llu (querypos %d..%d) - querylength %d = %llu\n",
- max_mismatches_allowed,(unsigned long long) segment->diagonal,
- segment->querypos5,segment->querypos3,querylength,(unsigned long long) segment_left));
- debug4t(
- gbuffer = (char *) CALLOC(querylength+1,sizeof(char));
- Genome_fill_buffer_blocks(segment_left,querylength,gbuffer);
- printf("genome 0..: %s\n",gbuffer);
- printf("query 0..: %s\n",queryuc_ptr);
- FREE(gbuffer);
- );
+ if (nmismatches_left == 0 || nmismatches_left <= max_mismatches_allowed ||
+ mismatch_positions[nmismatches_left-1] > querylength - max_terminal_length) {
+ debug4t(printf(" => Long terminal at left: nmismatches_left %d vs max_mismatches_allowed %d, last mismatch %d vs terminal pos %d",
+ nmismatches_left,max_mismatches_allowed,mismatch_positions[nmismatches_left-1],querylength - max_terminal_length));
+ if ((hit = Stage3end_new_terminal(/*querystart*/0,/*queryend*//*truncate_pos_left*/querylength,
+ /*left*/segment_left,query_compress_fwd,
+ querylength,/*plusp*/true,genestrand,first_read_p,
+ /*start_endtype*/END,/*end_endtype*/TERM,
+ segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength,max_mismatches_allowed,
+ /*sarrayp*/false)) != NULL) {
+ debug4t(printf(" => yes, with %d matches",Stage3end_nmatches_posttrim(hit)));
+ plus_terminals_left = List_push(plus_terminals_left,(void *) hit);
+ nterminals_left += 1;
+ } else {
+ debug4t(printf(" => no"));
+ }
+ debug4t(printf("\n"));
-#ifdef ALLOW_MIDDLE_ALIGNMENTS
- if (segment->querypos3 - segment->querypos5 > max_terminal_length /* was index1part */) {
- /* Check for middle section */
- debug4t(printf(" => ? Middle alignment based on querypos3 %d - querypos5 %d > max_terminal_length %d",
- segment->querypos3,segment->querypos5,max_terminal_length));
- if (nterminals_middle >= MAX_NTERMINALS) {
- /* Skip */
- debug4t(printf(" => Skipping because too many nterminals_middle"));
- } else {
- start_endtype = (segment->querypos5 < index1interval) ? END : TERM;
- end_endtype = (segment->querypos3 >= query_lastpos - index1interval) ? END : TERM;
- debug4t(printf(" querypos3 %d vs index1interval %d => start_endtype %s\n",
- segment->querypos3,index1interval,Endtype_string(start_endtype)));
- debug4t(printf(" querypos5 %d vs query_lastpos %d - index1interval %d => end_endtype %s\n",
- segment->querypos5,query_lastpos,index1interval,Endtype_string(end_endtype)));
+ } else if (mismatch_positions[(nmismatches_left-1)/2] > max_terminal_length) {
+ debug4t(printf(" => Short terminal at left: nmismatches_left %d vs max_mismatches_allowed %d, last mismatch %d vs terminal pos %d",
+ nmismatches_left,max_mismatches_allowed,mismatch_positions[(nmismatches_left-1)/2],max_terminal_length));
if ((hit = Stage3end_new_terminal(/*querystart*/0,/*queryend*//*truncate_pos_left*/querylength,
/*left*/segment_left,query_compress_fwd,
querylength,/*plusp*/true,genestrand,first_read_p,
- start_endtype,end_endtype,segment->chrnum,segment->chroffset,
+ /*start_endtype*/END,/*end_endtype*/TERM,
+ segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength,max_mismatches_allowed,
/*sarrayp*/false)) != NULL) {
debug4t(printf(" => yes, with %d matches",Stage3end_nmatches_posttrim(hit)));
- plus_terminals_middle = List_push(plus_terminals_middle,(void *) hit);
- nterminals_middle += 1;
+ plus_terminals_left = List_push(plus_terminals_left,(void *) hit);
+ nterminals_left += 1;
} else {
debug4t(printf(" => no"));
}
+ debug4t(printf("\n"));
+
}
- debug4t(printf("\n"));
+ }
+ if (nterminals_right >= MAX_NTERMINALS) {
+ /* Skip */
+ } else if (segment->floor_right > max_mismatches_allowed) {
+ debug4t(printf("Not checking right because floor_right %d > max_mismatches_allowed %d\n",
+ segment->floor_right,max_mismatches_allowed));
} else {
-#endif
-
- if (nterminals_left >= MAX_NTERMINALS) {
- /* Skip */
- } else if (segment->floor_left > max_mismatches_allowed) {
- debug4t(printf("Not checking left because floor_left %d > max_mismatches_allowed %d\n",
- segment->floor_left,max_mismatches_allowed));
- } else {
- /* Check from left */
- debug4t(printf("Checking left because floor_left %d <= max_mismatches_allowed %d\n",
- segment->floor_left,max_mismatches_allowed));
-
- nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
- query_compress_fwd,/*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
+ /* Check from right */
+ debug4t(printf("Checking right because floor_right %d <= max_mismatches_allowed %d\n",
+ segment->floor_right,max_mismatches_allowed));
+ nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
+ /*query_compress*/query_compress_fwd,
+ /*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
/*plusp*/true,genestrand,first_read_p);
- debug4t(
- printf("%d mismatches on left at:",nmismatches_left);
- for (i = 0; i <= nmismatches_left; i++) {
- printf(" %d",mismatch_positions[i]);
- }
- printf("\n");
- );
-
- if (nmismatches_left == 0 || nmismatches_left <= max_mismatches_allowed ||
- mismatch_positions[nmismatches_left-1] > querylength - max_terminal_length) {
- debug4t(printf(" => Long terminal at left: nmismatches_left %d vs max_mismatches_allowed %d, last mismatch %d vs terminal pos %d",
- nmismatches_left,max_mismatches_allowed,mismatch_positions[nmismatches_left-1],querylength - max_terminal_length));
- if ((hit = Stage3end_new_terminal(/*querystart*/0,/*queryend*//*truncate_pos_left*/querylength,
- /*left*/segment_left,query_compress_fwd,
- querylength,/*plusp*/true,genestrand,first_read_p,
- /*start_endtype*/END,/*end_endtype*/TERM,
- segment->chrnum,segment->chroffset,
- segment->chrhigh,segment->chrlength,max_mismatches_allowed,
- /*sarrayp*/false)) != NULL) {
- debug4t(printf(" => yes, with %d matches",Stage3end_nmatches_posttrim(hit)));
- plus_terminals_left = List_push(plus_terminals_left,(void *) hit);
- nterminals_left += 1;
- } else {
- debug4t(printf(" => no"));
- }
- debug4t(printf("\n"));
-
- } else if (mismatch_positions[(nmismatches_left-1)/2] > max_terminal_length) {
- debug4t(printf(" => Short terminal at left: nmismatches_left %d vs max_mismatches_allowed %d, last mismatch %d vs terminal pos %d",
- nmismatches_left,max_mismatches_allowed,mismatch_positions[(nmismatches_left-1)/2],max_terminal_length));
-
- if ((hit = Stage3end_new_terminal(/*querystart*/0,/*queryend*//*truncate_pos_left*/querylength,
- /*left*/segment_left,query_compress_fwd,
- querylength,/*plusp*/true,genestrand,first_read_p,
- /*start_endtype*/END,/*end_endtype*/TERM,
- segment->chrnum,segment->chroffset,
- segment->chrhigh,segment->chrlength,max_mismatches_allowed,
- /*sarrayp*/false)) != NULL) {
- debug4t(printf(" => yes, with %d matches",Stage3end_nmatches_posttrim(hit)));
- plus_terminals_left = List_push(plus_terminals_left,(void *) hit);
- nterminals_left += 1;
- } else {
- debug4t(printf(" => no"));
- }
- debug4t(printf("\n"));
-
+ debug4t(
+ printf("%d mismatches on right at:",nmismatches_right);
+ for (i = 0; i <= nmismatches_right; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
+
+ debug4t(printf("last mismatch %d, half mismatch %d, long terminalpos %d, short terminalpos %d\n",
+ mismatch_positions[nmismatches_right-1],mismatch_positions[(nmismatches_right-1)/2],
+ max_terminal_length,querylength - max_terminal_length));
+
+ if (nmismatches_right == 0 || nmismatches_right <= max_mismatches_allowed ||
+ mismatch_positions[nmismatches_right-1] < max_terminal_length) {
+ debug4t(printf(" => Long terminal at right: nmismatches_right %d vs max_mismatches_allowed %d, last mismatch %d vs terminal pos %d",
+ nmismatches_right,max_mismatches_allowed,mismatch_positions[nmismatches_right-1],max_terminal_length));
+ if ((hit = Stage3end_new_terminal(/*querystart*//*truncate_pos_right*/0,/*queryend*/querylength,
+ /*left*/segment_left,query_compress_fwd,
+ querylength,/*plusp*/true,genestrand,first_read_p,
+ /*start_endtype*/TERM,/*end_endtype*/END,
+ segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength,max_mismatches_allowed,
+ /*sarrayp*/false)) != NULL) {
+ debug4t(printf(" => yes, with %d matches",Stage3end_nmatches_posttrim(hit)));
+ plus_terminals_right = List_push(plus_terminals_right,(void *) hit);
+ nterminals_right += 1;
+ } else {
+ debug4t(printf(" => no"));
}
- }
+ debug4t(printf("\n"));
- if (nterminals_right >= MAX_NTERMINALS) {
- /* Skip */
- } else if (segment->floor_right > max_mismatches_allowed) {
- debug4t(printf("Not checking right because floor_right %d > max_mismatches_allowed %d\n",
- segment->floor_right,max_mismatches_allowed));
- } else {
- /* Check from right */
- debug4t(printf("Checking right because floor_right %d <= max_mismatches_allowed %d\n",
- segment->floor_right,max_mismatches_allowed));
- nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
- /*query_compress*/query_compress_fwd,
- /*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
- /*plusp*/true,genestrand,first_read_p);
-
- debug4t(
- printf("%d mismatches on right at:",nmismatches_right);
- for (i = 0; i <= nmismatches_right; i++) {
- printf(" %d",mismatch_positions[i]);
- }
- printf("\n");
- );
-
- debug4t(printf("last mismatch %d, half mismatch %d, long terminalpos %d, short terminalpos %d\n",
- mismatch_positions[nmismatches_right-1],mismatch_positions[(nmismatches_right-1)/2],
- max_terminal_length,querylength - max_terminal_length));
-
- if (nmismatches_right == 0 || nmismatches_right <= max_mismatches_allowed ||
- mismatch_positions[nmismatches_right-1] < max_terminal_length) {
- debug4t(printf(" => Long terminal at right: nmismatches_right %d vs max_mismatches_allowed %d, last mismatch %d vs terminal pos %d",
- nmismatches_right,max_mismatches_allowed,mismatch_positions[nmismatches_right-1],max_terminal_length));
- if ((hit = Stage3end_new_terminal(/*querystart*//*truncate_pos_right*/0,/*queryend*/querylength,
- /*left*/segment_left,query_compress_fwd,
- querylength,/*plusp*/true,genestrand,first_read_p,
- /*start_endtype*/TERM,/*end_endtype*/END,
- segment->chrnum,segment->chroffset,
- segment->chrhigh,segment->chrlength,max_mismatches_allowed,
- /*sarrayp*/false)) != NULL) {
- debug4t(printf(" => yes, with %d matches",Stage3end_nmatches_posttrim(hit)));
- plus_terminals_right = List_push(plus_terminals_right,(void *) hit);
- nterminals_right += 1;
- } else {
- debug4t(printf(" => no"));
- }
- debug4t(printf("\n"));
-
- } else if (mismatch_positions[(nmismatches_right-1)/2] < querylength - max_terminal_length) {
- debug4t(printf(" => Short terminal at right: nmismatches_right %d vs max_mismatches_allowed %d, last mismatch %d vs terminal pos %d",
- nmismatches_right,max_mismatches_allowed,mismatch_positions[(nmismatches_right-1)/2],querylength-max_terminal_length));
- if ((hit = Stage3end_new_terminal(/*querystart*//*truncate_pos_right*/0,/*queryend*/querylength,
- /*left*/segment_left,query_compress_fwd,
- querylength,/*plusp*/true,genestrand,first_read_p,
- /*start_endtype*/TERM,/*end_endtype*/END,
- segment->chrnum,segment->chroffset,
- segment->chrhigh,segment->chrlength,max_mismatches_allowed,
- /*sarrayp*/false)) != NULL) {
- debug4t(printf(" => yes, with %d matches",Stage3end_nmatches_posttrim(hit)));
- plus_terminals_right = List_push(plus_terminals_right,(void *) hit);
- nterminals_right += 1;
- } else {
- debug4t(printf(" => no"));
- }
- debug4t(printf("\n"));
+ } else if (mismatch_positions[(nmismatches_right-1)/2] < querylength - max_terminal_length) {
+ debug4t(printf(" => Short terminal at right: nmismatches_right %d vs max_mismatches_allowed %d, last mismatch %d vs terminal pos %d",
+ nmismatches_right,max_mismatches_allowed,mismatch_positions[(nmismatches_right-1)/2],querylength-max_terminal_length));
+ if ((hit = Stage3end_new_terminal(/*querystart*//*truncate_pos_right*/0,/*queryend*/querylength,
+ /*left*/segment_left,query_compress_fwd,
+ querylength,/*plusp*/true,genestrand,first_read_p,
+ /*start_endtype*/TERM,/*end_endtype*/END,
+ segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength,max_mismatches_allowed,
+ /*sarrayp*/false)) != NULL) {
+ debug4t(printf(" => yes, with %d matches",Stage3end_nmatches_posttrim(hit)));
+ plus_terminals_right = List_push(plus_terminals_right,(void *) hit);
+ nterminals_right += 1;
+ } else {
+ debug4t(printf(" => no"));
+ }
+ debug4t(printf("\n"));
- }
}
-#ifdef ALLOW_MIDDLE_ALIGNMENTS
}
-#endif
+#ifdef ALLOW_MIDDLE_ALIGNMENTS
}
+#endif
}
+ }
- if (nterminals_middle >= MAX_NTERMINALS) {
- for (p = plus_terminals_middle; p != NULL; p = p->rest) {
- hit = (Stage3end_T) p->first;
- Stage3end_free(&hit);
- }
- List_free(&plus_terminals_middle);
- plus_terminals_middle = (List_T) NULL;
+ if (nterminals_middle >= MAX_NTERMINALS) {
+ for (p = plus_terminals_middle; p != NULL; p = p->rest) {
+ hit = (Stage3end_T) p->first;
+ Stage3end_free(&hit);
}
+ List_free(&plus_terminals_middle);
+ plus_terminals_middle = (List_T) NULL;
+ }
- if (nterminals_left >= MAX_NTERMINALS) {
- for (p = plus_terminals_left; p != NULL; p = p->rest) {
- hit = (Stage3end_T) p->first;
- Stage3end_free(&hit);
- }
- List_free(&plus_terminals_left);
- plus_terminals_left = (List_T) NULL;
+ if (nterminals_left >= MAX_NTERMINALS) {
+ for (p = plus_terminals_left; p != NULL; p = p->rest) {
+ hit = (Stage3end_T) p->first;
+ Stage3end_free(&hit);
}
+ List_free(&plus_terminals_left);
+ plus_terminals_left = (List_T) NULL;
+ }
- if (nterminals_right >= MAX_NTERMINALS) {
- for (p = plus_terminals_right; p != NULL; p = p->rest) {
- hit = (Stage3end_T) p->first;
- Stage3end_free(&hit);
- }
- List_free(&plus_terminals_right);
- plus_terminals_right = (List_T) NULL;
+ if (nterminals_right >= MAX_NTERMINALS) {
+ for (p = plus_terminals_right; p != NULL; p = p->rest) {
+ hit = (Stage3end_T) p->first;
+ Stage3end_free(&hit);
}
+ List_free(&plus_terminals_right);
+ plus_terminals_right = (List_T) NULL;
}
- if (minus_nsegments > 0) {
- nterminals_left = nterminals_right = nterminals_middle = 0;
- for (segment = minus_segments; (/*nterminals_middle < MAX_NTERMINALS ||*/ nterminals_left < MAX_NTERMINALS || nterminals_right < MAX_NTERMINALS) &&
- segment < &(minus_segments[minus_nsegments]); segment++) {
- if (0 && segment->usedp == true) {
- /* Previously skipped, but looks like a bad idea */
- debug4t(printf("segment used\n"));
- } else if (segment->diagonal < (Univcoord_T) -1) {
- debug4t(printf("minus: %llu, %d..%d\n",(unsigned long long) segment->diagonal,segment->querypos5,segment->querypos3));
- segment_left = segment->diagonal - querylength;
- debug4t(printf("identify_terminals_minus: Getting genome at diagonal %llu (querypos %d..%d) + 12 - querylength %d = %llu\n",
- (unsigned long long) segment->diagonal,segment->querypos5,segment->querypos3,querylength,
- (unsigned long long) segment_left));
- debug4t(
- gbuffer = (char *) CALLOC(querylength+1,sizeof(char));
- Genome_fill_buffer_blocks(segment_left,querylength,gbuffer);
- printf("genome 0..: %s\n",gbuffer);
- printf("query.rc 0..: %s\n",queryrc);
- FREE(gbuffer);
- );
+ nterminals_left = nterminals_right = nterminals_middle = 0;
+ for (p = minus_anchor_segments; p != NULL && (/*nterminals_middle < MAX_NTERMINALS ||*/ nterminals_left < MAX_NTERMINALS || nterminals_right < MAX_NTERMINALS);
+ p = List_next(p)) {
+ segment = (Segment_T) List_head(p);
+ if (0 && segment->usedp == true) {
+ /* Previously skipped, but looks like a bad idea */
+ debug4t(printf("segment used\n"));
+ } else if (segment->diagonal < (Univcoord_T) -1) {
+ debug4t(printf("minus: %llu, %d..%d\n",(unsigned long long) segment->diagonal,segment->querypos5,segment->querypos3));
+ segment_left = segment->diagonal - querylength;
+ debug4t(printf("identify_terminals_minus: Getting genome at diagonal %llu (querypos %d..%d) + 12 - querylength %d = %llu\n",
+ (unsigned long long) segment->diagonal,segment->querypos5,segment->querypos3,querylength,
+ (unsigned long long) segment_left));
+ debug4t(
+ gbuffer = (char *) CALLOC(querylength+1,sizeof(char));
+ Genome_fill_buffer_blocks(segment_left,querylength,gbuffer);
+ printf("genome 0..: %s\n",gbuffer);
+ /* printf("query.rc 0..: %s\n",queryrc); */
+ FREE(gbuffer);
+ );
#ifdef ALLOW_MIDDLE_ALIGNMENTS
- if (segment->querypos3 - segment->querypos5 > max_terminal_length /* was index1part */) {
- /* Check for a middle section */
- debug4t(printf(" => ? Middle alignment based on querypos3 %d - querypos5 %d > max_terminal_length %d",
- segment->querypos3,segment->querypos5,max_terminal_length));
- if (nterminals_middle >= MAX_NTERMINALS) {
- /* Skip */
- debug4t(printf(" => Skipping because too many nterminals_middle"));
- } else {
- start_endtype = (segment->querypos5 < index1interval) ? END : TERM;
- end_endtype = (segment->querypos3 >= query_lastpos - index1interval) ? END : TERM;
- debug4t(printf(" querypos3 %d vs index1interval %d => start_endtype %s\n",
- segment->querypos3,index1interval,Endtype_string(start_endtype)));
- debug4t(printf(" querypos5 %d vs query_lastpos %d - index1interval %d => end_endtype %s\n",
- segment->querypos5,query_lastpos,index1interval,Endtype_string(end_endtype)));
+ if (segment->querypos3 - segment->querypos5 > max_terminal_length /* was index1part */) {
+ /* Check for a middle section */
+ debug4t(printf(" => ? Middle alignment based on querypos3 %d - querypos5 %d > max_terminal_length %d",
+ segment->querypos3,segment->querypos5,max_terminal_length));
+ if (nterminals_middle >= MAX_NTERMINALS) {
+ /* Skip */
+ debug4t(printf(" => Skipping because too many nterminals_middle"));
+ } else {
+ start_endtype = (segment->querypos5 < index1interval) ? END : TERM;
+ end_endtype = (segment->querypos3 >= query_lastpos - index1interval) ? END : TERM;
+ debug4t(printf(" querypos3 %d vs index1interval %d => start_endtype %s\n",
+ segment->querypos3,index1interval,Endtype_string(start_endtype)));
+ debug4t(printf(" querypos5 %d vs query_lastpos %d - index1interval %d => end_endtype %s\n",
+ segment->querypos5,query_lastpos,index1interval,Endtype_string(end_endtype)));
- if ((hit = Stage3end_new_terminal(/*querystart*/0,/*queryend*/querylength,
+ if ((hit = Stage3end_new_terminal(/*querystart*/0,/*queryend*/querylength,
+ /*left*/segment_left,query_compress_rev,
+ querylength,/*plusp*/false,genestrand,first_read_p,
+ start_endtype,end_endtype,segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength,max_mismatches_allowed,
+ /*sarrayp*/false)) != NULL) {
+ debug4t(printf(" => yes, with %d matches",Stage3end_nmatches_posttrim(hit)));
+ minus_terminals_middle = List_push(minus_terminals_middle,(void *) hit);
+ nterminals_middle += 1;
+ } else {
+ debug4t(printf(" => no"));
+ }
+ }
+ debug4t(printf("\n"));
+
+ } else {
+#endif
+
+ /* Need to reverse floor_left and floor_right */
+ if (nterminals_left >= MAX_NTERMINALS) {
+ /* Skip */
+ } else if (segment->floor_right > max_mismatches_allowed) {
+ debug4t(printf("Not checking left because floor_right %d > max_mismatches_allowed %d\n",
+ segment->floor_right,max_mismatches_allowed));
+ } else {
+ /* Check from left */
+ debug4t(printf("Checking left because floor_right %d <= max_mismatches_allowed %d\n",
+ segment->floor_right,max_mismatches_allowed));
+ nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
+ /*query_compress*/query_compress_rev,
+ /*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
+ /*plusp*/false,genestrand,first_read_p);
+
+ debug4t(
+ printf("%d mismatches on left at:",nmismatches_left);
+ for (i = 0; i <= nmismatches_left; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
+
+ if (nmismatches_left == 0 || nmismatches_left <= max_mismatches_allowed ||
+ mismatch_positions[nmismatches_left-1] > querylength - max_terminal_length) {
+ debug4t(printf(" => Long terminal at left: nmismatches_left %d vs max_mismatches_allowed %d, last mismatch %d vs terminal pos %d",
+ nmismatches_left,max_mismatches_allowed,mismatch_positions[nmismatches_left-1],querylength - max_terminal_length));
+ if ((hit = Stage3end_new_terminal(/*querystart*//*querylength-truncate_pos_left*/0,/*queryend*/querylength,
+ /*left*/segment_left,query_compress_rev,
+ querylength,/*plusp*/false,genestrand,first_read_p,
+ /*start_endtype*/TERM,/*end_endtype*/END,
+ segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength,max_mismatches_allowed,
+ /*sarrayp*/false)) != NULL) {
+ debug4t(printf(" => yes, with %d matches",Stage3end_nmatches_posttrim(hit)));
+ minus_terminals_left = List_push(minus_terminals_left,(void *) hit);
+ nterminals_left += 1;
+ } else {
+ debug4t(printf(" => no"));
+ }
+ debug4t(printf("\n"));
+
+ } else if (mismatch_positions[(nmismatches_left-1)/2] > max_terminal_length) {
+ debug4t(printf(" => Short terminal at left: nmismatches_left %d vs max_mismatches_allowed %d, last mismatch %d vs terminal pos %d",
+ nmismatches_left,max_mismatches_allowed,mismatch_positions[(nmismatches_left-1)/2],max_terminal_length));
+ if ((hit = Stage3end_new_terminal(/*querystart*//*querylength-truncate_pos_left*/0,/*queryend*/querylength,
/*left*/segment_left,query_compress_rev,
querylength,/*plusp*/false,genestrand,first_read_p,
- start_endtype,end_endtype,segment->chrnum,segment->chroffset,
+ /*start_endtype*/TERM,/*end_endtype*/END,
+ segment->chrnum,segment->chroffset,
segment->chrhigh,segment->chrlength,max_mismatches_allowed,
/*sarrayp*/false)) != NULL) {
debug4t(printf(" => yes, with %d matches",Stage3end_nmatches_posttrim(hit)));
- minus_terminals_middle = List_push(minus_terminals_middle,(void *) hit);
- nterminals_middle += 1;
+ minus_terminals_left = List_push(minus_terminals_left,(void *) hit);
+ nterminals_left += 1;
} else {
debug4t(printf(" => no"));
}
+ debug4t(printf("\n"));
}
- debug4t(printf("\n"));
+ }
+ if (nterminals_right >= MAX_NTERMINALS) {
+ /* Skip */
+ } else if (segment->floor_left > max_mismatches_allowed) {
+ debug4t(printf("Not checking right because floor_left %d > max_mismatches_allowed %d\n",
+ segment->floor_left,max_mismatches_allowed));
} else {
-#endif
-
- /* Need to reverse floor_left and floor_right */
- if (nterminals_left >= MAX_NTERMINALS) {
- /* Skip */
- } else if (segment->floor_right > max_mismatches_allowed) {
- debug4t(printf("Not checking left because floor_right %d > max_mismatches_allowed %d\n",
- segment->floor_right,max_mismatches_allowed));
- } else {
- /* Check from left */
- debug4t(printf("Checking left because floor_right %d <= max_mismatches_allowed %d\n",
- segment->floor_right,max_mismatches_allowed));
- nmismatches_left = Genome_mismatches_left(mismatch_positions,max_mismatches_allowed,
+ /* Check from right */
+ debug4t(printf("Checking right because floor_left %d <= max_mismatches_allowed %d\n",
+ segment->floor_left,max_mismatches_allowed));
+ nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
/*query_compress*/query_compress_rev,
/*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
/*plusp*/false,genestrand,first_read_p);
- debug4t(
- printf("%d mismatches on left at:",nmismatches_left);
- for (i = 0; i <= nmismatches_left; i++) {
- printf(" %d",mismatch_positions[i]);
- }
- printf("\n");
- );
+ debug4t(
+ printf("%d mismatches on right at:",nmismatches_right);
+ for (i = 0; i <= nmismatches_right; i++) {
+ printf(" %d",mismatch_positions[i]);
+ }
+ printf("\n");
+ );
- if (nmismatches_left == 0 || nmismatches_left <= max_mismatches_allowed ||
- mismatch_positions[nmismatches_left-1] > querylength - max_terminal_length) {
- debug4t(printf(" => Long terminal at left: nmismatches_left %d vs max_mismatches_allowed %d, last mismatch %d vs terminal pos %d",
- nmismatches_left,max_mismatches_allowed,mismatch_positions[nmismatches_left-1],querylength - max_terminal_length));
- if ((hit = Stage3end_new_terminal(/*querystart*//*querylength-truncate_pos_left*/0,/*queryend*/querylength,
- /*left*/segment_left,query_compress_rev,
- querylength,/*plusp*/false,genestrand,first_read_p,
- /*start_endtype*/TERM,/*end_endtype*/END,
- segment->chrnum,segment->chroffset,
- segment->chrhigh,segment->chrlength,max_mismatches_allowed,
- /*sarrayp*/false)) != NULL) {
- debug4t(printf(" => yes, with %d matches",Stage3end_nmatches_posttrim(hit)));
- minus_terminals_left = List_push(minus_terminals_left,(void *) hit);
- nterminals_left += 1;
- } else {
- debug4t(printf(" => no"));
- }
- debug4t(printf("\n"));
-
- } else if (mismatch_positions[(nmismatches_left-1)/2] > max_terminal_length) {
- debug4t(printf(" => Short terminal at left: nmismatches_left %d vs max_mismatches_allowed %d, last mismatch %d vs terminal pos %d",
- nmismatches_left,max_mismatches_allowed,mismatch_positions[(nmismatches_left-1)/2],max_terminal_length));
- if ((hit = Stage3end_new_terminal(/*querystart*//*querylength-truncate_pos_left*/0,/*queryend*/querylength,
- /*left*/segment_left,query_compress_rev,
- querylength,/*plusp*/false,genestrand,first_read_p,
- /*start_endtype*/TERM,/*end_endtype*/END,
- segment->chrnum,segment->chroffset,
- segment->chrhigh,segment->chrlength,max_mismatches_allowed,
- /*sarrayp*/false)) != NULL) {
- debug4t(printf(" => yes, with %d matches",Stage3end_nmatches_posttrim(hit)));
- minus_terminals_left = List_push(minus_terminals_left,(void *) hit);
- nterminals_left += 1;
- } else {
- debug4t(printf(" => no"));
- }
- debug4t(printf("\n"));
+ if (nmismatches_right == 0 || nmismatches_right <= max_mismatches_allowed ||
+ mismatch_positions[nmismatches_right-1] < max_terminal_length) {
+ debug4t(printf(" => Long terminal at right: nmismatches_right %d vs max_mismatches_allowed %d, last mismatch %d vs terminal pos %d",
+ nmismatches_right,max_mismatches_allowed,mismatch_positions[nmismatches_right-1],max_terminal_length));
+ if ((hit = Stage3end_new_terminal(/*querystart*/0,/*queryend*//*querylength-truncate_pos_right*/querylength,
+ /*left*/segment_left,query_compress_rev,
+ querylength,/*plusp*/false,genestrand,first_read_p,
+ /*start_endtype*/END,/*end_endtype*/TERM,
+ segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength,max_mismatches_allowed,
+ /*sarrayp*/false)) != NULL) {
+ debug4t(printf(" => yes, with %d matches",Stage3end_nmatches_posttrim(hit)));
+ minus_terminals_right = List_push(minus_terminals_right,(void *) hit);
+ nterminals_right += 1;
+ } else {
+ debug4t(printf(" => no"));
}
- }
-
- if (nterminals_right >= MAX_NTERMINALS) {
- /* Skip */
- } else if (segment->floor_left > max_mismatches_allowed) {
- debug4t(printf("Not checking right because floor_left %d > max_mismatches_allowed %d\n",
- segment->floor_left,max_mismatches_allowed));
- } else {
- /* Check from right */
- debug4t(printf("Checking right because floor_left %d <= max_mismatches_allowed %d\n",
- segment->floor_left,max_mismatches_allowed));
- nmismatches_right = Genome_mismatches_right(mismatch_positions,max_mismatches_allowed,
- /*query_compress*/query_compress_rev,
- /*left*/segment_left,/*pos5*/0,/*pos3*/querylength,
- /*plusp*/false,genestrand,first_read_p);
-
- debug4t(
- printf("%d mismatches on right at:",nmismatches_right);
- for (i = 0; i <= nmismatches_right; i++) {
- printf(" %d",mismatch_positions[i]);
- }
- printf("\n");
- );
+ debug4t(printf("\n"));
- if (nmismatches_right == 0 || nmismatches_right <= max_mismatches_allowed ||
- mismatch_positions[nmismatches_right-1] < max_terminal_length) {
- debug4t(printf(" => Long terminal at right: nmismatches_right %d vs max_mismatches_allowed %d, last mismatch %d vs terminal pos %d",
- nmismatches_right,max_mismatches_allowed,mismatch_positions[nmismatches_right-1],max_terminal_length));
- if ((hit = Stage3end_new_terminal(/*querystart*/0,/*queryend*//*querylength-truncate_pos_right*/querylength,
- /*left*/segment_left,query_compress_rev,
- querylength,/*plusp*/false,genestrand,first_read_p,
- /*start_endtype*/END,/*end_endtype*/TERM,
- segment->chrnum,segment->chroffset,
- segment->chrhigh,segment->chrlength,max_mismatches_allowed,
- /*sarrayp*/false)) != NULL) {
- debug4t(printf(" => yes, with %d matches",Stage3end_nmatches_posttrim(hit)));
- minus_terminals_right = List_push(minus_terminals_right,(void *) hit);
- nterminals_right += 1;
- } else {
- debug4t(printf(" => no"));
- }
- debug4t(printf("\n"));
-
- } else if (mismatch_positions[(nmismatches_right-1)/2] < querylength - max_terminal_length) {
- debug4t(printf(" => Short terminal at right: nmismatches_right %d vs max_mismatches_allowed %d, last mismatch %d vs terminal pos %d",
- nmismatches_right,max_mismatches_allowed,mismatch_positions[(nmismatches_right-1)/2],querylength-max_terminal_length));
- if ((hit = Stage3end_new_terminal(/*querystart*/0,/*queryend*//*querylength-truncate_pos_right*/querylength,
- /*left*/segment_left,query_compress_rev,
- querylength,/*plusp*/false,genestrand,first_read_p,
- /*start_endtype*/END,/*end_endtype*/TERM,
- segment->chrnum,segment->chroffset,
- segment->chrhigh,segment->chrlength,max_mismatches_allowed,
- /*sarrayp*/false)) != NULL) {
- debug4t(printf(" => yes, with %d matches",Stage3end_nmatches_posttrim(hit)));
- minus_terminals_right = List_push(minus_terminals_right,(void *) hit);
- nterminals_right += 1;
- } else {
- debug4t(printf(" => no"));
- }
- debug4t(printf("\n"));
+ } else if (mismatch_positions[(nmismatches_right-1)/2] < querylength - max_terminal_length) {
+ debug4t(printf(" => Short terminal at right: nmismatches_right %d vs max_mismatches_allowed %d, last mismatch %d vs terminal pos %d",
+ nmismatches_right,max_mismatches_allowed,mismatch_positions[(nmismatches_right-1)/2],querylength-max_terminal_length));
+ if ((hit = Stage3end_new_terminal(/*querystart*/0,/*queryend*//*querylength-truncate_pos_right*/querylength,
+ /*left*/segment_left,query_compress_rev,
+ querylength,/*plusp*/false,genestrand,first_read_p,
+ /*start_endtype*/END,/*end_endtype*/TERM,
+ segment->chrnum,segment->chroffset,
+ segment->chrhigh,segment->chrlength,max_mismatches_allowed,
+ /*sarrayp*/false)) != NULL) {
+ debug4t(printf(" => yes, with %d matches",Stage3end_nmatches_posttrim(hit)));
+ minus_terminals_right = List_push(minus_terminals_right,(void *) hit);
+ nterminals_right += 1;
+ } else {
+ debug4t(printf(" => no"));
}
+ debug4t(printf("\n"));
}
-#ifdef ALLOW_MIDDLE_ALIGNMENTS
}
-#endif
- }
- }
-
- if (nterminals_middle >= MAX_NTERMINALS) {
- for (p = minus_terminals_middle; p != NULL; p = p->rest) {
- hit = (Stage3end_T) p->first;
- Stage3end_free(&hit);
+#ifdef ALLOW_MIDDLE_ALIGNMENTS
}
- List_free(&minus_terminals_middle);
- minus_terminals_middle = (List_T) NULL;
+#endif
}
+ }
- if (nterminals_left >= MAX_NTERMINALS) {
- for (p = minus_terminals_left; p != NULL; p = p->rest) {
- hit = (Stage3end_T) p->first;
- Stage3end_free(&hit);
- }
- List_free(&minus_terminals_left);
- minus_terminals_left = (List_T) NULL;
+ if (nterminals_middle >= MAX_NTERMINALS) {
+ for (p = minus_terminals_middle; p != NULL; p = p->rest) {
+ hit = (Stage3end_T) p->first;
+ Stage3end_free(&hit);
}
+ List_free(&minus_terminals_middle);
+ minus_terminals_middle = (List_T) NULL;
+ }
- if (nterminals_right >= MAX_NTERMINALS) {
- for (p = minus_terminals_right; p != NULL; p = p->rest) {
- hit = (Stage3end_T) p->first;
- Stage3end_free(&hit);
- }
- List_free(&minus_terminals_right);
- minus_terminals_right = (List_T) NULL;
+ if (nterminals_left >= MAX_NTERMINALS) {
+ for (p = minus_terminals_left; p != NULL; p = p->rest) {
+ hit = (Stage3end_T) p->first;
+ Stage3end_free(&hit);
}
+ List_free(&minus_terminals_left);
+ minus_terminals_left = (List_T) NULL;
}
- debug4t(printf("Total number of terminals: %d\n",List_length(terminals)));
+ if (nterminals_right >= MAX_NTERMINALS) {
+ for (p = minus_terminals_right; p != NULL; p = p->rest) {
+ hit = (Stage3end_T) p->first;
+ Stage3end_free(&hit);
+ }
+ List_free(&minus_terminals_right);
+ minus_terminals_right = (List_T) NULL;
+ }
return List_append(plus_terminals_middle,
List_append(plus_terminals_left,
@@ -10691,21 +10869,397 @@ intragenic_splice_p (Chrpos_T splicedistance, Substring_T donor, Substring_T acc
return true;
}
}
-
- return false;
+
+ return false;
+}
+
+
+
+static List_T
+find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
+ List_T *localsplicing, List_T distantsplicing_orig,
+ List_T *startfrags_plus, List_T *endfrags_plus,
+ List_T *startfrags_minus, List_T *endfrags_minus,
+ int localsplicing_penalty, int distantsplicing_penalty,
+ int querylength, int nmismatches_allowed, bool first_read_p) {
+ List_T distantsplicing = NULL, p, q, qsave;
+ Substring_T startfrag, endfrag;
+ int min_endlength_1, min_endlength_2, nmismatches1, nmismatches2, pos;
+ Chrpos_T distance;
+ Univcoord_T startfrag_genomicstart, endfrag_genomicstart;
+ bool shortdistancep;
+ double nonidentity = 1.0 - min_distantsplicing_identity;
+ Chrnum_T chrnum;
+
+ debug(printf("Starting find_splicepairs_distant_dna with nonidentity %f\n",nonidentity));
+ debug4l(printf("Starting find_splicepairs_distant_dna with nonidentity %f\n",nonidentity));
+
+ if (nonidentity == 0.0) {
+ nmismatches_allowed = 0;
+ }
+
+ for (nmismatches1 = 0; nmismatches1 <= nmismatches_allowed; nmismatches1++) {
+ nmismatches2 = nmismatches_allowed - nmismatches1;
+
+ if (nonidentity == 0.0) {
+ min_endlength_1 = min_endlength_2 = min_distantsplicing_end_matches;
+ } else {
+ min_endlength_1 = rint((double) nmismatches1/nonidentity);
+ if (min_endlength_1 < min_distantsplicing_end_matches) {
+ min_endlength_1 = min_distantsplicing_end_matches;
+ }
+ min_endlength_2 = rint((double) nmismatches2/nonidentity);
+ if (min_endlength_2 < min_distantsplicing_end_matches) {
+ min_endlength_2 = min_distantsplicing_end_matches;
+ }
+ }
+
+ debug4l(printf(" nmismatches1 = %d, nmismatches2 = %d, min_endlength_1 = %d, min_endlength_2 = %d\n",
+ nmismatches1,nmismatches2,min_endlength_1,min_endlength_2));
+
+ /************************************************************************
+ * Same strands
+ ************************************************************************/
+
+ /* 1. End 1 to End 2. Same strands. */
+ p = startfrags_plus[nmismatches1];
+ q = endfrags_plus[nmismatches2];
+ debug4l(printf("find_splicepairs_distant_dna (%d+%d mismatches): startfrags+ (%d) to endfrags+ (%d)\n",
+ nmismatches1,nmismatches2,List_length(p),List_length(q)));
+ while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
+ startfrag = (Substring_T) p->first;
+ endfrag = (Substring_T) q->first;
+ debug4ld(printf("end1-end2: startfrag at %llu and endfrag at %llu\n",
+ (unsigned long long) Substring_genomicstart(startfrag),(unsigned long long) Substring_genomicstart(endfrag)));
+
+ if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) {
+ debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n"));
+ p = p->rest;
+ } else if (pos > querylength - min_endlength_2) {
+ debug4ld(printf("chimera_pos of startfrag > querylength - min_endlength_2\n"));
+ p = p->rest;
+ } else if (pos < Substring_chimera_pos(endfrag)) {
+ debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag)));
+ p = p->rest;
+ } else if (pos > Substring_chimera_pos(endfrag)) {
+ debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag)));
+ q = q->rest;
+ } else {
+ /* Generate all pairs at this splice_pos */
+ qsave = q;
+ while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
+ startfrag = (Substring_T) p->first;
+ debug4ld(printf("startfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(startfrag),pos));
+ q = qsave;
+ while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
+ endfrag = (Substring_T) q->first;
+ debug4ld(printf("endfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(endfrag),pos));
+ if (Substring_genomicstart(endfrag) == Substring_genomicstart(startfrag)) {
+ /* Skip. Really a continuous match. */
+ } else {
+ if ((chrnum = Substring_chrnum(startfrag)) != Substring_chrnum(endfrag)) {
+ distance = 0U;
+ shortdistancep = false;
+ } else if ((endfrag_genomicstart = Substring_genomicstart(endfrag)) > (startfrag_genomicstart = Substring_genomicstart(startfrag))) {
+ distance = endfrag_genomicstart - startfrag_genomicstart;
+ if (distance <= shortsplicedist) {
+ shortdistancep = true;
+ } else if (distances_observed_p == true &&
+ intragenic_splice_p(distance,startfrag,endfrag) == true) {
+ shortdistancep = true;
+ } else {
+ shortdistancep = false;
+ }
+ } else {
+ distance = startfrag_genomicstart - endfrag_genomicstart;
+ shortdistancep = false; /* scramble */
+ }
+ debug4ld(printf("1-2. Pushing a candidate at splice_pos %d (%d..%d), startfrag %llu to endfrag %llu. shortdistancep = %d\n",
+ pos,min_endlength_1,querylength-min_endlength_2,
+ (unsigned long long) Substring_genomicstart(startfrag),
+ (unsigned long long) Substring_genomicstart(endfrag),shortdistancep));
+
+ if (shortdistancep) {
+ *localsplicing = List_push(*localsplicing,
+ (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ startfrag,endfrag,/*donor_prob*/0.0,/*acceptor_prob*/0.0,distance,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_NULL,/*sarrayp*/false));
+ } else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
+ distantsplicing = List_push(distantsplicing,
+ (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ startfrag,endfrag,/*donor_prob*/0.0,/*acceptor_prob*/0.0,distance,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_NULL,/*sarrayp*/false));
+ (*ndistantsplicepairs)++;
+ }
+
+ }
+ q = q->rest;
+
+ }
+ p = p->rest;
+ }
+ }
+ }
+
+ /* 4. End 3 to End 4. Same strands. */
+ p = startfrags_minus[nmismatches1];
+ q = endfrags_minus[nmismatches2];
+ debug4l(printf("find_splicepairs_distant_dna (%d+%d mismatches): startfrags- (%d) to endfrags- (%d)\n",
+ nmismatches1,nmismatches2,List_length(p),List_length(q)));
+ while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
+ startfrag = (Substring_T) p->first;
+ endfrag = (Substring_T) q->first;
+ debug4ld(printf("end3-end4: startfrag at %llu and endfrag at %llu\n",
+ (unsigned long long) Substring_genomicstart(startfrag),
+ (unsigned long long) Substring_genomicstart(endfrag)));
+
+ if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) {
+ debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n"));
+ p = p->rest;
+ } else if (pos > querylength - min_endlength_2) {
+ debug4ld(printf("chimera_pos of startfrag > querylength - min_endlength_2\n"));
+ p = p->rest;
+ } else if (pos < Substring_chimera_pos(endfrag)) {
+ debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag)));
+ p = p->rest;
+ } else if (pos > Substring_chimera_pos(endfrag)) {
+ debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag)));
+ q = q->rest;
+ } else {
+ qsave = q;
+ while (p != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
+ startfrag = (Substring_T) p->first;
+ debug4ld(printf("startfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(startfrag),pos));
+ q = qsave;
+ while (q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */ && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
+ endfrag = (Substring_T) q->first;
+ debug4ld(printf("endfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(endfrag),pos));
+ if (Substring_genomicstart(endfrag) == Substring_genomicstart(startfrag)) {
+ /* Skip. Really a continuous match. */
+ } else {
+ if ((chrnum = Substring_chrnum(startfrag)) != Substring_chrnum(endfrag)) {
+ distance = 0U;
+ shortdistancep = false;
+ } else if ((endfrag_genomicstart = Substring_genomicstart(endfrag)) > (startfrag_genomicstart = Substring_genomicstart(startfrag))) {
+ distance = endfrag_genomicstart - startfrag_genomicstart;
+ shortdistancep = false; /* scramble */
+ } else {
+ distance = startfrag_genomicstart - endfrag_genomicstart;
+ if (distance <= shortsplicedist) {
+ shortdistancep = true;
+ } else if (distances_observed_p == true &&
+ intragenic_splice_p(distance,startfrag,endfrag) == true) {
+ shortdistancep = true;
+ } else {
+ shortdistancep = false;
+ }
+ }
+ debug4ld(printf("3-4. Pushing a candidate at splice_pos %d (%d..%d), startfrag %llu to endfrag %llu. shortdistancep = %d.\n",
+ pos,min_endlength_1,querylength-min_endlength_2,
+ (unsigned long long) Substring_genomicstart(startfrag),
+ (unsigned long long) Substring_genomicstart(endfrag),shortdistancep));
+ if (shortdistancep) {
+ *localsplicing = List_push(*localsplicing,
+ (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ startfrag,endfrag,/*donor_prob*/0.0,/*acceptor_prob*/0.0,distance,
+ /*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_NULL,/*sarrayp*/false));
+ } else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
+ distantsplicing = List_push(distantsplicing,
+ (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ startfrag,endfrag,/*donor_prob*/0.0,/*acceptor_prob*/0.0,distance,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_NULL,/*sarrayp*/false));
+ (*ndistantsplicepairs)++;
+ }
+ }
+ q = q->rest;
+
+ }
+ p = p->rest;
+ }
+ }
+ }
+
+ /* 5. End 5 to End 6. Same strands. */
+ /* 8. End 7 to End 8. Same strands. */
+
+
+ /************************************************************************
+ * Different strands
+ ************************************************************************/
+
+ /* 2. End 1 to End 4. Different strands. */
+ p = startfrags_plus[nmismatches1];
+ q = endfrags_minus[nmismatches2];
+ debug4l(printf("find_splicepairs_distant_dna (%d+%d mismatches): startfrags+ (%d) to endfrags- (%d)\n",
+ nmismatches1,nmismatches2,List_length(p),List_length(q)));
+ while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
+ startfrag = (Substring_T) p->first;
+ endfrag = (Substring_T) q->first;
+ debug4ld(printf("end1-end4: startfrag at %llu and endfrag at %llu\n",
+ (unsigned long long) Substring_genomicstart(startfrag),
+ (unsigned long long) Substring_genomicstart(endfrag)));
+
+ if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) {
+ debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n"));
+ p = p->rest;
+ } else if (pos > querylength - min_endlength_2) {
+ debug4ld(printf("chimera_pos of startfrag > querylength - min_endlength_2\n"));
+ p = p->rest;
+ } else if (pos < Substring_chimera_pos(endfrag)) {
+ debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag)));
+ p = p->rest;
+ } else if (pos > Substring_chimera_pos(endfrag)) {
+ debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag)));
+ q = q->rest;
+ } else {
+ qsave = q;
+ while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
+ startfrag = (Substring_T) p->first;
+ debug4ld(printf("startfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(startfrag),pos));
+ q = qsave;
+ while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
+ endfrag = (Substring_T) q->first;
+ debug4ld(printf("endfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(endfrag),pos));
+ if (Substring_chrnum(startfrag) != Substring_chrnum(endfrag)) {
+ distance = 0U;
+ } else if ((Substring_genomicstart(endfrag) - pos) > (Substring_genomicstart(startfrag) + pos)) {
+ distance = (Substring_genomicstart(endfrag) - pos) - (Substring_genomicstart(startfrag) + pos);
+ } else {
+ distance = (Substring_genomicstart(startfrag) + pos) - (Substring_genomicstart(endfrag) - pos);
+ }
+ debug4ld(printf("1-4. Pushing a candidate at splice_pos %d (%d..%d), startfrag %llu to endfrag %llu. Different strands, so not shortdistance.\n",
+ pos,min_endlength_1,querylength-min_endlength_2,
+ (unsigned long long) Substring_genomicstart(startfrag),
+ (unsigned long long) Substring_genomicstart(endfrag)));
+ distantsplicing = List_push(distantsplicing,
+ (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ startfrag,endfrag,/*donor_prob*/0.0,/*acceptor_prob*/0.0,distance,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_NULL,/*sarrayp*/false));
+ (*ndistantsplicepairs)++;
+ q = q->rest;
+ }
+ p = p->rest;
+ }
+ }
+ }
+
+ /* 3. End 3 to End 2. Different strands. */
+ p = startfrags_minus[nmismatches1];
+ q = endfrags_plus[nmismatches2];
+ debug4l(printf("find_splicepairs_distant_dna (%d+%d mismatches): startfrags- (%d) to endfrags+ (%d)\n",
+ nmismatches1,nmismatches2,List_length(p),List_length(q)));
+ while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
+ startfrag = (Substring_T) p->first;
+ endfrag = (Substring_T) q->first;
+ debug4ld(printf("end3-end2: startfrag at %llu and endfrag at %llu\n",
+ (unsigned long long) Substring_genomicstart(startfrag),
+ (unsigned long long) Substring_genomicstart(endfrag)));
+
+ if ((pos = Substring_chimera_pos(startfrag)) < min_endlength_1) {
+ debug4ld(printf("chimera_pos of startfrag < min_endlength_1\n"));
+ p = p->rest;
+ } else if (pos > querylength - min_endlength_2) {
+ debug4ld(printf("chimera_pos of startfrag > querylength - min_endlength_2\n"));
+ p = p->rest;
+ } else if (pos < Substring_chimera_pos(endfrag)) {
+ debug4ld(printf("chimera_pos of startfrag %d < chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag)));
+ p = p->rest;
+ } else if (pos > Substring_chimera_pos(endfrag)) {
+ debug4ld(printf("chimera_pos of startfrag %d > chimera_pos of endfrag %d\n",pos,Substring_chimera_pos(endfrag)));
+ q = q->rest;
+ } else {
+ qsave = q;
+ while (p != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) p->first)) == pos) {
+ startfrag = (Substring_T) p->first;
+ debug4ld(printf("startfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(startfrag),pos));
+ q = qsave;
+ while (q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS && Substring_chimera_pos(((Substring_T) q->first)) == pos) {
+ endfrag = (Substring_T) q->first;
+ debug4ld(printf("endfrag at %llu, pos %d\n",(unsigned long long) Substring_genomicstart(endfrag),pos));
+ if (Substring_chrnum(startfrag) != Substring_chrnum(endfrag)) {
+ distance = 0U;
+ } else if (Substring_genomicstart(endfrag) > Substring_genomicstart(startfrag)) {
+ distance = (Substring_genomicstart(endfrag) + pos) - (Substring_genomicstart(startfrag) - pos);
+ } else {
+ distance = (Substring_genomicstart(startfrag) - pos) - (Substring_genomicstart(endfrag) + pos);
+ }
+ debug4ld(printf("3-2. Pushing a candidate at splice_pos %d (%d..%d), startfrag %llu to endfrag %llu. Different strands so not shortdistance.\n",
+ pos,min_endlength_1,querylength-min_endlength_2,
+ (unsigned long long) Substring_genomicstart(startfrag),
+ (unsigned long long) Substring_genomicstart(endfrag)));
+ distantsplicing = List_push(distantsplicing,
+ (void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
+ startfrag,endfrag,/*donor_prob*/0.0,/*acceptor_prob*/0.0,distance,
+ /*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
+ /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
+ /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
+ /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
+ /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
+ /*copy_donor_p*/true,/*copy_acceptor_p*/true,first_read_p,
+ /*sensedir*/SENSE_NULL,/*sarrayp*/false));
+ (*ndistantsplicepairs)++;
+ q = q->rest;
+ }
+ p = p->rest;
+ }
+ }
+ }
+
+ /* 6. End 5 to End 8. Different strands. */
+ /* 7. End 7 to End 6. Different strands. */
+ }
+
+ debug4l(printf("ndistantsplicepairs %d, maxchimerapaths %d\n",*ndistantsplicepairs,MAXCHIMERAPATHS));
+ if (*ndistantsplicepairs > MAXCHIMERAPATHS) {
+ /* Can afford to ignore these if MAXCHIMERAPATHS is set high enough */
+ stage3list_gc(&distantsplicing);
+ return distantsplicing_orig;
+ } else {
+ return List_append(distantsplicing_orig,distantsplicing);
+ }
}
static List_T
-find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
- List_T *localsplicing, List_T distantsplicing_orig,
- List_T *donors_plus, List_T *antidonors_plus,
- List_T *acceptors_plus, List_T *antiacceptors_plus,
- List_T *donors_minus, List_T *antidonors_minus,
- List_T *acceptors_minus, List_T *antiacceptors_minus,
- int localsplicing_penalty, int distantsplicing_penalty,
- int querylength, int nmismatches_allowed, bool first_read_p) {
+find_splicepairs_distant_rna (int *found_score, int *ndistantsplicepairs,
+ List_T *localsplicing, List_T distantsplicing_orig,
+ List_T *donors_plus, List_T *antidonors_plus,
+ List_T *acceptors_plus, List_T *antiacceptors_plus,
+ List_T *donors_minus, List_T *antidonors_minus,
+ List_T *acceptors_minus, List_T *antiacceptors_minus,
+ int localsplicing_penalty, int distantsplicing_penalty,
+ int querylength, int nmismatches_allowed, bool first_read_p) {
List_T distantsplicing = NULL, p, q, qsave;
Substring_T donor, acceptor;
int min_endlength_1, min_endlength_2, nmismatches1, nmismatches2, pos;
@@ -10715,8 +11269,8 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
double nonidentity = 1.0 - min_distantsplicing_identity;
Chrnum_T chrnum;
- debug(printf("Starting find_splicepairs_distant with nonidentity %f\n",nonidentity));
- debug4l(printf("Starting find_splicepairs_distant with nonidentity %f\n",nonidentity));
+ debug(printf("Starting find_splicepairs_distant_rna with nonidentity %f\n",nonidentity));
+ debug4l(printf("Starting find_splicepairs_distant_rna with nonidentity %f\n",nonidentity));
if (nonidentity == 0.0) {
nmismatches_allowed = 0;
@@ -10748,7 +11302,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
/* 1. End 1 to End 2. Same strands. */
p = donors_plus[nmismatches1];
q = acceptors_plus[nmismatches2];
- debug4l(printf("find_splicepairs_known_distant (%d+%d mismatches): donors+ (%d) to acceptors+ (%d)\n",
+ debug4l(printf("find_splicepairs_distant_rna (%d+%d mismatches): donors+ (%d) to acceptors+ (%d)\n",
nmismatches1,nmismatches2,List_length(p),List_length(q)));
while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
donor = (Substring_T) p->first;
@@ -10806,7 +11360,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
if (shortdistancep) {
*localsplicing = List_push(*localsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,distance,
+ donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
/*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -10817,7 +11371,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
} else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,distance,
+ donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
/*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -10840,7 +11394,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
/* 4. End 3 to End 4. Same strands. */
p = donors_minus[nmismatches1];
q = acceptors_minus[nmismatches2];
- debug4l(printf("find_splicepairs_known_distant (%d+%d mismatches): donors- (%d) to acceptors- (%d)\n",
+ debug4l(printf("find_splicepairs_distant_rna (%d+%d mismatches): donors- (%d) to acceptors- (%d)\n",
nmismatches1,nmismatches2,List_length(p),List_length(q)));
while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
donor = (Substring_T) p->first;
@@ -10897,7 +11451,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
if (shortdistancep) {
*localsplicing = List_push(*localsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,distance,
+ donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
/*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -10908,7 +11462,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
} else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,distance,
+ donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
/*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -10930,7 +11484,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
/* 5. End 5 to End 6. Same strands. */
p = antidonors_plus[nmismatches1];
q = antiacceptors_plus[nmismatches2];
- debug4l(printf("find_splicepairs_known_distant (%d+%d mismatches): antidonors+ (%d) to antiacceptors+ (%d)\n",
+ debug4l(printf("find_splicepairs_distant_rna (%d+%d mismatches): antidonors+ (%d) to antiacceptors+ (%d)\n",
nmismatches1,nmismatches2,List_length(p),List_length(q)));
while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
donor = (Substring_T) p->first;
@@ -10988,7 +11542,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
if (shortdistancep) {
*localsplicing = List_push(*localsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,distance,
+ donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
/*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -10999,7 +11553,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
} else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,distance,
+ donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
/*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -11021,7 +11575,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
/* 8. End 7 to End 8. Same strands. */
p = antidonors_minus[nmismatches1];
q = antiacceptors_minus[nmismatches2];
- debug4l(printf("find_splicepairs_known_distant (%d+%d mismatches): antidonors- (%d) to antiacceptors- (%d)\n",
+ debug4l(printf("find_splicepairs_distant_rna (%d+%d mismatches): antidonors- (%d) to antiacceptors- (%d)\n",
nmismatches1,nmismatches2,List_length(p),List_length(q)));
while (p != NULL && q != NULL /* && *nsplicepairs <= MAXCHIMERAPATHS */) {
donor = (Substring_T) p->first;
@@ -11079,7 +11633,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
if (shortdistancep) {
*localsplicing = List_push(*localsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,distance,
+ donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
/*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -11090,7 +11644,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
} else if (*ndistantsplicepairs <= MAXCHIMERAPATHS) {
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,distance,
+ donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
/*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -11117,7 +11671,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
/* 2. End 1 to End 4. Different strands. */
p = donors_plus[nmismatches1];
q = acceptors_minus[nmismatches2];
- debug4l(printf("find_splicepairs_known_distant (%d+%d mismatches): donors+ (%d) to acceptors- (%d)\n",
+ debug4l(printf("find_splicepairs_distant_rna (%d+%d mismatches): donors+ (%d) to acceptors- (%d)\n",
nmismatches1,nmismatches2,List_length(p),List_length(q)));
while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
donor = (Substring_T) p->first;
@@ -11160,7 +11714,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(acceptor)));
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,distance,
+ donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
/*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -11179,7 +11733,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
/* 3. End 3 to End 2. Different strands. */
p = donors_minus[nmismatches1];
q = acceptors_plus[nmismatches2];
- debug4l(printf("find_splicepairs_known_distant (%d+%d mismatches): donors- (%d) to acceptors+ (%d)\n",
+ debug4l(printf("find_splicepairs_distant_rna (%d+%d mismatches): donors- (%d) to acceptors+ (%d)\n",
nmismatches1,nmismatches2,List_length(p),List_length(q)));
while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
donor = (Substring_T) p->first;
@@ -11222,7 +11776,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(acceptor)));
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,distance,
+ donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
/*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -11242,7 +11796,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
/* 6. End 5 to End 8. Different strands. */
p = antidonors_plus[nmismatches1];
q = antiacceptors_minus[nmismatches2];
- debug4l(printf("find_splicepairs_known_distant (%d+%d mismatches): antidonors+ (%d) to antiacceptors- (%d)\n",
+ debug4l(printf("find_splicepairs_distant_rna (%d+%d mismatches): antidonors+ (%d) to antiacceptors- (%d)\n",
nmismatches1,nmismatches2,List_length(p),List_length(q)));
while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
donor = (Substring_T) p->first;
@@ -11285,7 +11839,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(acceptor)));
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,distance,
+ donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
/*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -11304,7 +11858,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
/* 7. End 7 to End 6. Different strands. */
p = antidonors_minus[nmismatches1];
q = antiacceptors_plus[nmismatches2];
- debug4l(printf("find_splicepairs_known_distant (%d+%d mismatches): antidonors- (%d) to antiacceptors+ (%d)\n",
+ debug4l(printf("find_splicepairs_distant_rna (%d+%d mismatches): antidonors- (%d) to antiacceptors+ (%d)\n",
nmismatches1,nmismatches2,List_length(p),List_length(q)));
while (p != NULL && q != NULL && *ndistantsplicepairs <= MAXCHIMERAPATHS) {
donor = (Substring_T) p->first;
@@ -11347,7 +11901,7 @@ find_splicepairs_distant (int *found_score, int *ndistantsplicepairs,
(unsigned long long) Substring_genomicstart(acceptor)));
distantsplicing = List_push(distantsplicing,
(void *) Stage3end_new_splice(&(*found_score),nmismatches1,nmismatches2,
- donor,acceptor,distance,
+ donor,acceptor,Substring_chimera_prob(donor),Substring_chimera_prob(acceptor),distance,
/*shortdistancep*/false,distantsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -11466,7 +12020,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(printf("End 1: short-overlap donor_plus: Successful ambiguous from donor #%d with amb_length %d\n",
Substring_splicesites_knowni(donor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,/*acceptor*/NULL,/*distance*/0U,
+ donor,/*acceptor*/NULL,Substring_chimera_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,amb_length,/*amb_prob*/2.0,
/*ambcoords_donor*/NULL,ambcoords,
/*ambi_donor*/NULL,/*ambi_acceptor*/splicesites_i,
@@ -11486,13 +12040,13 @@ find_splicepairs_shortend (int *found_score, List_T hits,
if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj,
Substring_chimera_pos(donor),nmismatches_shortend,
/*prob*/2.0,/*left*/bestleft,query_compress_fwd,
- querylength,/*plusp*/true,genestrand,first_read_p,/*sensep*/true,
+ querylength,/*plusp*/true,genestrand,first_read_p,/*sensedir*/SENSE_FORWARD,
Substring_chrnum(donor),Substring_chroffset(donor),
Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
debug4h(printf("End 1: short-overlap donor_plus: Successful splice from donor #%d to acceptor #%d\n",
Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,acceptor,/*distance*/bestleft-origleft,
+ donor,acceptor,Substring_chimera_prob(donor),/*acceptor_prob*/2.0,/*distance*/bestleft-origleft,
/*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -11542,7 +12096,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(printf("End 2: short-overlap acceptor_plus: Successful ambiguous from acceptor #%d with amb_length %d\n",
Substring_splicesites_knowni(acceptor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- /*donor*/NULL,acceptor,/*distance*/0U,
+ /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_chimera_prob(acceptor),/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,amb_length,/*amb_prob*/2.0,
ambcoords,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
@@ -11562,13 +12116,13 @@ find_splicepairs_shortend (int *found_score, List_T hits,
if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj,
Substring_chimera_pos(acceptor),nmismatches_shortend,
/*prob*/2.0,/*left*/bestleft,query_compress_fwd,
- querylength,/*plusp*/true,genestrand,first_read_p,/*sensep*/true,
+ querylength,/*plusp*/true,genestrand,first_read_p,/*sensedir*/SENSE_FORWARD,
Substring_chrnum(acceptor),Substring_chroffset(acceptor),
Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
debug4h(printf("End 2: short-overlap acceptor_plus: Successful splice from acceptor #%d to donor #%d\n",
Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- donor,acceptor,/*distance*/origleft-bestleft,
+ donor,acceptor,/*donor_prob*/2.0,Substring_chimera_prob(acceptor),/*distance*/origleft-bestleft,
/*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -11618,7 +12172,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(printf("End 3: short-overlap donor_minus: Successful ambiguous from donor #%d with amb_length %d\n",
Substring_splicesites_knowni(donor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,/*acceptor*/NULL,/*distance*/0U,
+ donor,/*acceptor*/NULL,Substring_chimera_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,amb_length,/*amb_prob*/2.0,
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i,
@@ -11638,13 +12192,13 @@ find_splicepairs_shortend (int *found_score, List_T hits,
if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj,
querylength-Substring_chimera_pos(donor),nmismatches_shortend,
/*prob*/2.0,/*left*/bestleft,query_compress_rev,
- querylength,/*plusp*/false,genestrand,first_read_p,/*sensep*/true,
+ querylength,/*plusp*/false,genestrand,first_read_p,/*sensedir*/SENSE_FORWARD,
Substring_chrnum(donor),Substring_chroffset(donor),
Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
debug4h(printf("End 3: short-overlap donor_minus: Successful splice from donor #%d to acceptor #%d\n",
Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,acceptor,/*distance*/origleft-bestleft,
+ donor,acceptor,Substring_chimera_prob(donor),/*acceptor_prob*/2.0,/*distance*/origleft-bestleft,
/*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -11695,7 +12249,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(printf("End 4: short-overlap acceptor_minus: Successful ambiguous from acceptor #%d with amb_length %d\n",
Substring_splicesites_knowni(acceptor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- /*donor*/NULL,acceptor,/*distance*/0U,
+ /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_chimera_prob(acceptor),/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,amb_length,/*amb_prob*/2.0,
ambcoords,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
@@ -11715,13 +12269,13 @@ find_splicepairs_shortend (int *found_score, List_T hits,
if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj,
querylength-Substring_chimera_pos(acceptor),nmismatches_shortend,
/*prob*/2.0,/*left*/bestleft,query_compress_rev,
- querylength,/*plusp*/false,genestrand,first_read_p,/*sensep*/true,
+ querylength,/*plusp*/false,genestrand,first_read_p,/*sensedir*/SENSE_FORWARD,
Substring_chrnum(acceptor),Substring_chroffset(acceptor),
Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
debug4h(printf("End 4: short-overlap acceptor_minus: Successful splice from acceptor #%d to #%d\n",
Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- donor,acceptor,/*distance*/bestleft-origleft,
+ donor,acceptor,/*donor_prob*/2.0,Substring_chimera_prob(acceptor),/*distance*/bestleft-origleft,
/*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -11771,7 +12325,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(printf("End 5: short-overlap antidonor_plus: Successful ambiguous from antidonor #%d with amb_length %d\n",
Substring_splicesites_knowni(donor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,/*acceptor*/NULL,/*distance*/0U,
+ donor,/*acceptor*/NULL,Substring_chimera_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,amb_length,/*amb_prob*/2.0,
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i,
@@ -11791,13 +12345,13 @@ find_splicepairs_shortend (int *found_score, List_T hits,
if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj,
Substring_chimera_pos(donor),nmismatches_shortend,
/*prob*/2.0,/*left*/bestleft,query_compress_fwd,
- querylength,/*plusp*/true,genestrand,first_read_p,/*sensep*/false,
+ querylength,/*plusp*/true,genestrand,first_read_p,/*sensedir*/SENSE_ANTI,
Substring_chrnum(donor),Substring_chroffset(donor),
Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
debug4h(printf("End 5: short-overlap antidonor_plus: Successful splice from antidonor #%d to antiacceptor #%d\n",
Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,acceptor,/*distance*/origleft-bestleft,
+ donor,acceptor,Substring_chimera_prob(donor),/*acceptor_prob*/2.0,/*distance*/origleft-bestleft,
/*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -11848,7 +12402,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(printf("End 6: short-overlap antiacceptor_plus: Successful ambiguous from antiacceptor #%d with amb_length %d\n",
Substring_splicesites_knowni(acceptor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- /*donor*/NULL,acceptor,/*distance*/0U,
+ /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_chimera_prob(acceptor),/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,amb_length,/*amb_prob*/2.0,
ambcoords,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
@@ -11868,13 +12422,13 @@ find_splicepairs_shortend (int *found_score, List_T hits,
if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj,
Substring_chimera_pos(acceptor),nmismatches_shortend,
/*prob*/2.0,/*left*/bestleft,query_compress_fwd,
- querylength,/*plusp*/true,genestrand,first_read_p,/*sensep*/false,
+ querylength,/*plusp*/true,genestrand,first_read_p,/*sensedir*/SENSE_ANTI,
Substring_chrnum(acceptor),Substring_chroffset(acceptor),
Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
debug4h(printf("End 6: short-overlap antiacceptor_plus: Successful splice from antiacceptor #%d to antidonor #%d\n",
Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- donor,acceptor,/*distance*/bestleft-origleft,
+ donor,acceptor,/*donor_prob*/2.0,Substring_chimera_prob(acceptor),/*distance*/bestleft-origleft,
/*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -11925,7 +12479,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(printf("End 7: short-overlap antidonor_minus: Successful ambiguous from antidonor #%d with amb_length %d\n",
Substring_splicesites_knowni(donor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,/*acceptor*/NULL,/*distance*/0U,
+ donor,/*acceptor*/NULL,Substring_chimera_prob(donor),Doublelist_max(probs_list),/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,amb_length,/*amb_prob*/2.0,
/*ambcoords_donor*/NULL,ambcoords,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/splicesites_i,
@@ -11945,13 +12499,13 @@ find_splicepairs_shortend (int *found_score, List_T hits,
if ((acceptor = Substring_new_acceptor(/*acceptor_coord*/splicesites[bestj],/*acceptor_knowni*/bestj,
querylength-Substring_chimera_pos(donor),nmismatches_shortend,
/*prob*/2.0,/*left*/bestleft,query_compress_rev,
- querylength,/*plusp*/false,genestrand,first_read_p,/*sensep*/false,
+ querylength,/*plusp*/false,genestrand,first_read_p,/*sensedir*/SENSE_ANTI,
Substring_chrnum(donor),Substring_chroffset(donor),
Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
debug4h(printf("End 7: short-overlap antidonor_minus: Successful splice from antidonor #%d to antiacceptor #%d\n",
Substring_splicesites_knowni(donor),Substring_splicesites_knowni(acceptor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches,nmismatches_shortend,
- donor,acceptor,/*distance*/bestleft-origleft,
+ donor,acceptor,Substring_chimera_prob(donor),/*acceptor_prob*/2.0,/*distance*/bestleft-origleft,
/*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -12001,7 +12555,7 @@ find_splicepairs_shortend (int *found_score, List_T hits,
debug4h(printf("End 8: short-overlap antiacceptor_minus: Successful ambiguous from antiacceptor #%d with amb_length %d\n",
Substring_splicesites_knowni(acceptor),amb_length));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- /*donor*/NULL,acceptor,/*distance*/0U,
+ /*donor*/NULL,acceptor,Doublelist_max(probs_list),Substring_chimera_prob(acceptor),/*distance*/0U,
/*shortdistancep*/false,/*penalty*/0,querylength,amb_length,/*amb_prob*/2.0,
ambcoords,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/splicesites_i,/*amb_knowni_acceptor*/NULL,
@@ -12021,13 +12575,13 @@ find_splicepairs_shortend (int *found_score, List_T hits,
if ((donor = Substring_new_donor(/*donor_coord*/splicesites[bestj],/*donor_knowni*/bestj,
querylength-Substring_chimera_pos(acceptor),nmismatches_shortend,
/*prob*/2.0,/*left*/bestleft,query_compress_rev,
- querylength,/*plusp*/false,genestrand,first_read_p,/*sensep*/false,
+ querylength,/*plusp*/false,genestrand,first_read_p,/*sensedir*/SENSE_ANTI,
Substring_chrnum(acceptor),Substring_chroffset(acceptor),
Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
debug4h(printf("End 8: short-overlap antiacceptor_minus: Successful splice from antiacceptor #%d to antidonor #%d\n",
Substring_splicesites_knowni(acceptor),Substring_splicesites_knowni(donor)));
hits = List_push(hits,(void *) Stage3end_new_splice(&(*found_score),nmismatches_shortend,nmismatches,
- donor,acceptor,/*distance*/origleft-bestleft,
+ donor,acceptor,/*donor_prob*/2.0,Substring_chimera_prob(acceptor),/*distance*/origleft-bestleft,
/*shortdistancep*/true,localsplicing_penalty,querylength,/*amb_length*/0,/*amb_prob*/0.0,
/*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
/*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
@@ -12176,6 +12730,7 @@ compute_floors (bool *any_omitted_p, bool *alloc_floors_p, Floors_T *floors_arra
static void
complete_set_mm_indels (int *found_score, bool *segments_computed_p,
+ List_T *plus_anchor_segments, List_T *minus_anchor_segments,
int *opt_level, int *done_level, int user_maxlevel,
bool revise_levels_p, int *nhits, List_T *subs, List_T *indels, T this,
Compress_T query_compress_fwd, Compress_T query_compress_rev,
@@ -12193,6 +12748,7 @@ complete_set_mm_indels (int *found_score, bool *segments_computed_p,
#endif
debug(printf("Starting complete_set_mm_indels with found_score %d\n",*found_score));
+
this->plus_segments = NULL;
this->minus_segments = NULL;
@@ -12222,33 +12778,36 @@ complete_set_mm_indels (int *found_score, bool *segments_computed_p,
debug(printf("*** Stage 5. Complete set mismatches up to %d (done_level %d, fast_level %d) ***\n",
max_mismatches_allowed,*done_level,fast_level));
- if (max_mismatches_allowed >= 0) {
- this->plus_segments = identify_all_segments(&this->plus_nsegments,&this->plus_spliceable,&this->plus_nspliceable,
+ if (1 || max_mismatches_allowed >= 0) {
+ this->plus_segments = identify_all_segments(&this->plus_nsegments,&(*plus_anchor_segments),
+ &this->plus_spliceable,&this->plus_nspliceable,
#ifdef LARGE_GENOMES
this->plus_positions_high,this->plus_positions_low,
#else
this->plus_positions,
#endif
this->plus_npositions,this->omitted,querylength,query_lastpos,floors,
- /*plusp*/true);
- this->minus_segments = identify_all_segments(&this->minus_nsegments,&this->minus_spliceable,&this->minus_nspliceable,
+ /*max_mismatches_allowed*/*done_level,/*plusp*/true);
+ this->minus_segments = identify_all_segments(&this->minus_nsegments,&(*minus_anchor_segments),
+ &this->minus_spliceable,&this->minus_nspliceable,
#ifdef LARGE_GENOMES
this->minus_positions_high,this->minus_positions_low,
#else
this->minus_positions,
#endif
this->minus_npositions,this->omitted,querylength,query_lastpos,floors,
- /*plusp*/false);
+ /*max_mismatches_allowed*/*done_level,/*plusp*/false);
- *subs = find_complete_mm(&(*found_score),&(*nhits),*subs,this->plus_segments,this->plus_nsegments,
+ *subs = find_complete_mm(&(*found_score),&(*nhits),*subs,*plus_anchor_segments,
querylength,/*queryptr:queryuc_ptr,*/
/*query_compress*/query_compress_fwd,
- max_mismatches_allowed,/*plusp*/true,genestrand,first_read_p);
+ /*max_mismatches_allowed*/*done_level,/*plusp*/true,genestrand,first_read_p);
- *subs = find_complete_mm(&(*found_score),&(*nhits),*subs,this->minus_segments,this->minus_nsegments,
+ *subs = find_complete_mm(&(*found_score),&(*nhits),*subs,*minus_anchor_segments,
querylength,/*queryptr:queryrc,*/
/*query_compress*/query_compress_rev,
- max_mismatches_allowed,/*plusp*/false,genestrand,first_read_p);
+ /*max_mismatches_allowed*/*done_level,/*plusp*/false,genestrand,first_read_p);
+
*segments_computed_p = true;
debug(printf("5> found_score = %d, opt_level %d, done_level %d\n",*found_score,*opt_level,*done_level));
@@ -12262,27 +12821,37 @@ complete_set_mm_indels (int *found_score, bool *segments_computed_p,
}
}
+#if 0
+ opt_level = (found_score < opt_level) ? found_score : opt_level;
+ if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
+ done_level = user_maxlevel;
+ }
+ debug(printf("10> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
+#endif
+
if (*done_level >= indel_penalty_middle || *done_level >= indel_penalty_end) {
/* 6. Indels */
/* Need to reverse, because middle indelsplicing procedure depends on ascending diagonal order */
if (*segments_computed_p == false) {
- this->plus_segments = identify_all_segments(&this->plus_nsegments,&this->plus_spliceable,&this->plus_nspliceable,
+ this->plus_segments = identify_all_segments(&this->plus_nsegments,&(*plus_anchor_segments),
+ &this->plus_spliceable,&this->plus_nspliceable,
#ifdef LARGE_GENOMES
this->plus_positions_high,this->plus_positions_low,
#else
this->plus_positions,
#endif
this->plus_npositions,this->omitted,querylength,query_lastpos,floors,
- /*plusp*/true);
- this->minus_segments = identify_all_segments(&this->minus_nsegments,&this->minus_spliceable,&this->minus_nspliceable,
+ /*max_mismatches_allowed*/*done_level,/*plusp*/true);
+ this->minus_segments = identify_all_segments(&this->minus_nsegments,&(*minus_anchor_segments),
+ &this->minus_spliceable,&this->minus_nspliceable,
#ifdef LARGE_GENOMES
this->minus_positions_high,this->minus_positions_low,
#else
this->minus_positions,
#endif
this->minus_npositions,this->omitted,querylength,query_lastpos,floors,
- /*plusp*/false);
+ /*max_mismatches_allowed*/*done_level,/*plusp*/false);
*segments_computed_p = true;
}
@@ -12297,17 +12866,16 @@ complete_set_mm_indels (int *found_score, bool *segments_computed_p,
#ifdef DEBUG2
queryuc_ptr,queryrc,
#endif
- floors,querylength,query_lastpos,query_compress_fwd,query_compress_rev,
+ floors,querylength,query_lastpos,query_compress_fwd,query_compress_rev,genome,
/*indel_mismatches_allowed*/indel_level - indel_penalty,genestrand,first_read_p);
if (allow_end_indels_p == true) {
debug(printf("*** Stage 6. End indels with %d-%d mismatches allowed\n",indel_level,indel_penalty));
- *indels = find_end_indels(&(*found_score),&(*nhits),*indels,this->plus_segments,this->minus_segments,
- this->plus_nsegments,this->minus_nsegments,
+ *indels = find_end_indels(&(*found_score),&(*nhits),*indels,*plus_anchor_segments,*minus_anchor_segments,
#ifdef DEBUG2E
queryuc_ptr,queryrc,
#endif
- querylength,firstbound,lastbound,query_compress_fwd,query_compress_rev,
+ querylength,firstbound,lastbound,query_compress_fwd,query_compress_rev,genome,
max_end_insertions,max_end_deletions,min_indel_end_matches,indel_penalty,
/*indel_mismatches_allowed*/indel_level - indel_penalty,genestrand,first_read_p);
}
@@ -12322,7 +12890,7 @@ complete_set_mm_indels (int *found_score, bool *segments_computed_p,
}
#else
/* Do all in one sweep */
- debug(printf("*** Stage 6A. Middle indels with %d-%d mismatches allowed, found_score = %d\n",
+ debug(printf("*** Stage 6 (middle). Middle indels with %d-%d mismatches allowed, found_score = %d\n",
*done_level,indel_penalty_middle,*found_score));
*indels = find_middle_indels(&(*found_score),&(*nhits),*indels,
this->plus_spliceable,this->plus_nspliceable,
@@ -12338,13 +12906,12 @@ complete_set_mm_indels (int *found_score, bool *segments_computed_p,
*done_level = user_maxlevel;
}
}
- debug(printf("6A> found_score = %d, opt_level %d, done_level %d\n",*found_score,*opt_level,*done_level));
+ debug(printf("6 (middle)> found_score = %d, opt_level %d, done_level %d\n",*found_score,*opt_level,*done_level));
if (allow_end_indels_p == true) {
- debug(printf("*** Stage 6B. End indels with %d-%d mismatches allowed, found_score = %d\n",
+ debug(printf("*** Stage 6 (end). End indels with %d-%d mismatches allowed, found_score = %d\n",
*done_level,indel_penalty_end,*found_score));
- *indels = find_end_indels(&(*found_score),&(*nhits),*indels,this->plus_segments,this->minus_segments,
- this->plus_nsegments,this->minus_nsegments,
+ *indels = find_end_indels(&(*found_score),&(*nhits),*indels,*plus_anchor_segments,*minus_anchor_segments,
#ifdef DEBUG2E
queryuc_ptr,queryrc,
#endif
@@ -12357,7 +12924,7 @@ complete_set_mm_indels (int *found_score, bool *segments_computed_p,
*done_level = user_maxlevel;
}
}
- debug(printf("6B> found_score = %d, opt_level %d, done_level %d\n",*found_score,*opt_level,*done_level));
+ debug(printf("6 (end)> found_score = %d, opt_level %d, done_level %d\n",*found_score,*opt_level,*done_level));
}
/* Calling procedure will invoke Stage3_remove_duplicates */
#endif
@@ -12464,7 +13031,7 @@ complete_set_doublesplicing (int *found_score, List_T localsplicing_orig, Floors
localsplicing_plus = find_doublesplices(&(*found_score),/*localsplicing*/NULL,&lowprob,
this->plus_spliceable,this->plus_nspliceable,this->plus_segments,
- /*queryptr*/queryuc_ptr,floors,querylength,query_lastpos,
+ /*queryptr*/queryuc_ptr,querylength,query_lastpos,
/*query_compress*/query_compress_fwd,
/*max_distance*/shortsplicedist,/*splicing_penalty*/localsplicing_penalty,
min_shortend,max_mismatches_allowed,pairedp,first_read_p,
@@ -12472,7 +13039,7 @@ complete_set_doublesplicing (int *found_score, List_T localsplicing_orig, Floors
localsplicing_minus = find_doublesplices(&(*found_score),/*localsplicing*/NULL,&lowprob,
this->minus_spliceable,this->minus_nspliceable,this->minus_segments,
- /*queryptr*/queryrc,floors,querylength,query_lastpos,
+ /*queryptr*/queryrc,querylength,query_lastpos,
/*query_compress*/query_compress_rev,
/*max_distance*/shortsplicedist,/*splicing_penalty*/localsplicing_penalty,
min_shortend,max_mismatches_allowed,pairedp,first_read_p,
@@ -12567,24 +13134,25 @@ History_put (History_T this, Univinterval_T interval, List_T gmap_hits) {
}
+/* Also defined in sarray-read.c */
#define add_bounded(x,plusterm,highbound) ((x + (plusterm) >= highbound) ? (highbound - 1) : x + (plusterm))
#define subtract_bounded(x,minusterm,lowbound) ((x < lowbound + (minusterm)) ? lowbound : x - (minusterm))
static List_T
-run_gmap (bool *good_start_p, bool *good_end_p, History_T gmap_history,
- List_T hits, char *queryuc_ptr, int querylength,
- int sense_try, bool favor_right_p, int paired_favor_mode, int zero_offset,
- Compress_T query_compress_fwd, Compress_T query_compress_rev,
-
- Univcoord_T mappingstart, Univcoord_T mappingend,
- Univcoord_T knownsplice_limit_low, Univcoord_T knownsplice_limit_high,
- bool watsonp, int genestrand, bool first_read_p,
- Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength,
-
- Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
- Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
- Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR, int user_maxlevel) {
+run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_history,
+ List_T hits, char *accession, char *queryuc_ptr, int querylength,
+ int sense_try, bool favor_right_p, int paired_favor_mode, int zero_offset,
+ Compress_T query_compress_fwd, Compress_T query_compress_rev,
+
+ Univcoord_T mappingstart, Univcoord_T mappingend,
+ Univcoord_T knownsplice_limit_low, Univcoord_T knownsplice_limit_high,
+ bool watsonp, int genestrand, bool first_read_p,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength,
+
+ Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+ Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+ Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR, int user_maxlevel) {
Stage3end_T hit;
#ifdef EXTRACT_GENOMICSEG
char *genomicseg, *genomicseg_alloc;
@@ -12610,275 +13178,305 @@ run_gmap (bool *good_start_p, bool *good_end_p, History_T gmap_history,
int ncanonical, nsemicanonical, nnoncanonical;
int maxintronlen_bound;
+
debug13(printf("Running GMAP at mappingstart %u + %d = mappingend %u, watsonp %d, sense_try %d, querylength %d, limits %u..%u\n",
(Chrpos_T) (mappingstart-chroffset),mappingend-mappingstart,
(Chrpos_T) (mappingend-chroffset),watsonp,sense_try,querylength,
(Chrpos_T) (knownsplice_limit_low-chroffset),(Chrpos_T) (knownsplice_limit_high-chroffset)));
- assert(mappingend > mappingstart);
-
*good_start_p = *good_end_p = false;
- interval = Univinterval_new(mappingstart,mappingend,sense_try);
- debug13(printf("Checking history for interval at %u..%u (sense_try %d)\n",
- mappingstart,mappingend,sense_try));
- if ((stored_hits = History_get(gmap_history,interval)) != NULL) {
- debug13(printf("Already ran these coordinates, and have results\n"));
- for (p = stored_hits; p != NULL; p = List_next(p)) {
- if ((hit = (Stage3end_T) List_head(p)) != NULL) {
- if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
- *good_start_p = true;
- }
- if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
- *good_end_p = true;
+ /* It is possible for mappingend to equal mappingstart if the read
+ is forced to the beginning or end of a chromosome */
+ if (mappingend > mappingstart) {
+ interval = Univinterval_new(mappingstart,mappingend,sense_try);
+ debug13(printf("Checking history for interval at %u..%u (sense_try %d)\n",
+ mappingstart,mappingend,sense_try));
+ if ((stored_hits = History_get(gmap_history,interval)) != NULL) {
+ debug13(printf("Already ran these coordinates, and have results\n"));
+ for (p = stored_hits; p != NULL; p = List_next(p)) {
+ if ((hit = (Stage3end_T) List_head(p)) != NULL) {
+ if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+ *good_start_p = true;
+ }
+ if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+ *good_end_p = true;
+ }
+ hits = List_push(hits,(void *) Stage3end_copy(hit));
}
- hits = List_push(hits,(void *) Stage3end_copy(hit));
}
+ Univinterval_free(&interval);
+ return hits;
+ } else {
+ debug13(printf("New coordinates\n"));
+ /* stored_hits = (List_T) NULL; -- Already NULL */
}
- Univinterval_free(&interval);
- return hits;
- } else {
- debug13(printf("New coordinates\n"));
- /* stored_hits = (List_T) NULL; -- Already NULL */
- }
#ifdef EXTRACT_GENOMICSEG
- if (watsonp == true) {
- printf("Allocating %u bytes\n",genomiclength);
- genomicseg_alloc = (char *) CALLOC(genomiclength+MAX_INDEXSIZE+1,sizeof(char));
- genomicseg = &(genomicseg_alloc[MAX_INDEXSIZE]);
- Genome_fill_buffer_blocks(genomicstart-MAX_INDEXSIZE,genomiclength+MAX_INDEXSIZE,genomicseg_alloc);
- } else {
- printf("Allocating %u bytes\n",genomiclength);
- genomicseg_alloc = (char *) CALLOC(genomiclength+MAX_INDEXSIZE+1,sizeof(char));
- genomicseg = &(genomicseg_alloc[MAX_INDEXSIZE]);
- Genome_fill_buffer_blocks(genomicstart,genomiclength+MAX_INDEXSIZE,genomicseg_alloc);
- make_complement_inplace(genomicseg_alloc,genomiclength+MAX_INDEXSIZE);
- }
+ if (watsonp == true) {
+ printf("Allocating %u bytes\n",genomiclength);
+ genomicseg_alloc = (char *) CALLOC(genomiclength+MAX_INDEXSIZE+1,sizeof(char));
+ genomicseg = &(genomicseg_alloc[MAX_INDEXSIZE]);
+ Genome_fill_buffer_blocks(genomicstart-MAX_INDEXSIZE,genomiclength+MAX_INDEXSIZE,genomicseg_alloc);
+ } else {
+ printf("Allocating %u bytes\n",genomiclength);
+ genomicseg_alloc = (char *) CALLOC(genomiclength+MAX_INDEXSIZE+1,sizeof(char));
+ genomicseg = &(genomicseg_alloc[MAX_INDEXSIZE]);
+ Genome_fill_buffer_blocks(genomicstart,genomiclength+MAX_INDEXSIZE,genomicseg_alloc);
+ make_complement_inplace(genomicseg_alloc,genomiclength+MAX_INDEXSIZE);
+ }
#endif
- if (chroffset + chrlength < chrhigh) {
- debug13(printf("Chromosome is circular because chroffset %u + chrlength %u < chrhigh %u\n",
- chroffset,chrlength,chrhigh));
- maxintronlen_bound = 0;
- } else {
+#if 0
+ /* Should be able to have splicing on a circular chromosome */
+ if (chroffset + chrlength < chrhigh) {
+ debug13(printf("Chromosome is circular because chroffset %u + chrlength %u < chrhigh %u\n",
+ chroffset,chrlength,chrhigh));
+ maxintronlen_bound = 0;
+ } else {
+ maxintronlen_bound = shortsplicedist;
+ }
+#else
maxintronlen_bound = shortsplicedist;
- }
+#endif
+
- /* Note: Use nmatches post-trim to decide if the alignment is high
- quality or worth keeping. But if so, then use nmatches_pretrim
- for ranking and scoring purposes. */
+ /* Note: Use nmatches post-trim to decide if the alignment is high
+ quality or worth keeping. But if so, then use nmatches_pretrim
+ for ranking and scoring purposes. */
- /* use_shifted_canonical_p == true can be slow and can give wrong answers */
- all_stage2results = Stage2_compute(&stage2_source,&stage2_indexsize,
- /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*query_offset*/0,
-
- /*chrstart*/mappingstart-chroffset,/*chrend*/mappingend-chroffset,
- chroffset,chrhigh,/*plusp*/watsonp,genestrand,
+ /* use_shifted_canonical_p == true can be slow and can give wrong answers */
+ all_stage2results = Stage2_compute(&stage2_source,&stage2_indexsize,
+ /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*query_offset*/0,
+ /*chrstart*/mappingstart-chroffset,/*chrend*/mappingend-chroffset,
+ chroffset,chrhigh,/*plusp*/watsonp,genestrand,
- oligoindices_major,/*proceed_pctcoverage*/0.5,
- pairpool,diagpool,cellpool,sufflookback,nsufflookback,
- maxintronlen_bound,/*localp*/true,
- /*skip_repetitive_p*/true,favor_right_p,/*max_nalignments*/MAX_NALIGNMENTS,
- /*debug_graphic_p*/false,/*diagnosticp*/false,
- /*worker_stopwatch*/NULL,/*diag_debug*/false);
- debug13(printf("Got %d stage2 results\n",List_length(all_stage2results)));
-
- if (all_stage2results == NULL) {
- stored_hits = List_push(stored_hits,(void *) NULL);
- }
-
- for (p = all_stage2results; p != NULL; p = List_next(p)) {
- stage2 = (Stage2_T) List_head(p);
- if ((pairarray = Stage3_compute(&pairs,&npairs,&goodness,&cdna_direction,&sensedir,
- &matches,&nmatches_posttrim,&max_match_length,
- &ambig_end_length_5,&ambig_end_length_3,
- &ambig_splicetype_5,&ambig_splicetype_3,
- &ambig_prob_5,&ambig_prob_3,
- &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
- &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,stage2,
+ oligoindices_major,/*proceed_pctcoverage*/0.5,
+ pairpool,diagpool,cellpool,/*localp*/true,
+ /*skip_repetitive_p*/true,favor_right_p,/*max_nalignments*/MAX_NALIGNMENTS,
+ /*debug_graphic_p*/false,/*worker_stopwatch*/NULL,/*diag_debug*/false);
+
+ debug13(printf("Got %d stage2 results\n",List_length(all_stage2results)));
+
+ if (all_stage2results == NULL) {
+ stored_hits = List_push(stored_hits,(void *) NULL);
+ }
+
+ for (p = all_stage2results; p != NULL; p = List_next(p)) {
+ stage2 = (Stage2_T) List_head(p);
+ if ((pairarray = Stage3_compute(&pairs,&npairs,&goodness,&cdna_direction,&sensedir,
+ &matches,&nmatches_posttrim,&max_match_length,
+ &ambig_end_length_5,&ambig_end_length_3,
+ &ambig_splicetype_5,&ambig_splicetype_3,
+ &ambig_prob_5,&ambig_prob_3,
+ &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
+ &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,
+ Stage2_middle(stage2),Stage2_all_starts(stage2),Stage2_all_ends(stage2),
#ifdef END_KNOWNSPLICING_SHORTCUT
- cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
- watsonp ? query_compress_fwd : query_compress_rev,
+ cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
+ watsonp ? query_compress_fwd : query_compress_rev,
#endif
- /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
+ /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
#ifdef EXTRACT_GENOMICSEG
- /*query_subseq_offset*/0,
+ /*query_subseq_offset*/0,
#else
- /*query_subseq_offset*/0,
-#endif
- chrnum,chroffset,chrhigh,
- knownsplice_limit_low,knownsplice_limit_high,watsonp,genestrand,
- /*jump_late_p*/watsonp ? false : true,
- maxpeelback,maxpeelback_distalmedial,nullgap,
- extramaterial_end,extramaterial_paired,
- extraband_single,extraband_end,extraband_paired,
- minendexon,pairpool,dynprogL,dynprogM,dynprogR,ngap,
- /*diagnosticp*/false,/*checkp*/false,
- /*do_final_p*/true,sense_try,/*sense_filter*/0,
- oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,maxintronlen_bound,
- /*close_indels_mode*/+1,paired_favor_mode,zero_offset)) == NULL) {
- debug13(printf("stage3 is NULL\n"));
- stored_hits = List_push(stored_hits,(void *) NULL);
-
- } else {
- debug13(printf("stage3 is not NULL\n"));
-
- debug13a(Pair_dump_array(pairarray,npairs,true));
+ /*query_subseq_offset*/0,
+#endif
+ chrnum,chroffset,chrhigh,
+ knownsplice_limit_low,knownsplice_limit_high,watsonp,genestrand,
+ /*jump_late_p*/watsonp ? false : true,
- if (0 && Stage3_short_alignment_p(pairarray,npairs,querylength) == true) {
- /* Very bad alignment */
- debug13(printf("Very bad alignment\n"));
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ sense_try,/*sense_filter*/0,
+ oligoindices_minor,diagpool,cellpool)) == NULL) {
+ debug13(printf("stage3 is NULL\n"));
stored_hits = List_push(stored_hits,(void *) NULL);
- FREE_OUT(pairarray);
} else {
-#if 0
- Pair_print_gsnap(stdout,pairarray,npairs,invertedp,chrnum,chroffset,chrhigh,
- querylength,watsonp,cdna_direction,chromosome_iit);
-#endif
+ debug13(printf("stage3 is not NULL\n"));
- nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
- pairarray,npairs);
- if (watsonp == true) {
- start = subtract_bounded(chroffset + Pair_genomepos(&(pairarray[0])),
- /*minusterm*/Pair_querypos(&(pairarray[0])),chroffset);
- end = add_bounded(chroffset + Pair_genomepos(&(pairarray[npairs-1])),
- /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),chrhigh);
- if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
- ambig_end_length_5,ambig_end_length_3,
- ambig_splicetype_5,ambig_splicetype_3,
- ambig_prob_5,ambig_prob_3,min_splice_prob,
- pairarray,npairs,nsegments,nintrons,nindelbreaks,
- /*left*/start,/*genomiclength*/end - start + 1,
- /*plusp*/watsonp,genestrand,first_read_p,
- querylength,chrnum,chroffset,chrhigh,chrlength,
- cdna_direction,sensedir)) == NULL) {
- debug13(printf("Stage3end_new_gmap returns NULL\n"));
- stored_hits = List_push(stored_hits,(void *) NULL);
- FREE_OUT(pairarray);
+ debug13a(Pair_dump_array(pairarray,npairs,true));
+
+ if (0 && Stage3_short_alignment_p(pairarray,npairs,querylength) == true) {
+ /* Very bad alignment */
+ debug13(printf("Very bad alignment\n"));
+ stored_hits = List_push(stored_hits,(void *) NULL);
+ FREE_OUT(pairarray);
+
+ } else {
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray,npairs);
+ if (watsonp == true) {
+ start = subtract_bounded(chroffset + Pair_genomepos(&(pairarray[0])),
+ /*minusterm*/Pair_querypos(&(pairarray[0])),chroffset);
+ end = add_bounded(chroffset + Pair_genomepos(&(pairarray[npairs-1])),
+ /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),chrhigh);
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
+ ambig_end_length_5,ambig_end_length_3,
+ ambig_splicetype_5,ambig_splicetype_3,
+ ambig_prob_5,ambig_prob_3,min_splice_prob,
+ pairarray,npairs,nsegments,nintrons,nindelbreaks,
+ /*left*/start,/*genomiclength*/end - start + 1,
+ /*plusp*/watsonp,genestrand,first_read_p,
+ accession,querylength,chrnum,chroffset,chrhigh,chrlength,
+ cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_REGION)) == NULL) {
+ debug13(printf("Stage3end_new_gmap returns NULL\n"));
+ stored_hits = List_push(stored_hits,(void *) NULL);
+ FREE_OUT(pairarray);
#if 0
- } else if (Stage3end_bad_stretch_p(hit,query_compress_fwd,query_compress_rev) == true) {
- debug13(printf("Stage3end_new_gmap has a bad stretch\n"));
- Stage3end_free(&hit);
- stored_hits = List_push(stored_hits,(void *) NULL);
- /* FREE_OUT(pairarray); */
+ } else if (Stage3end_bad_stretch_p(hit,query_compress_fwd,query_compress_rev) == true) {
+ debug13(printf("Stage3end_new_gmap has a bad stretch\n"));
+ Stage3end_free(&hit);
+ stored_hits = List_push(stored_hits,(void *) NULL);
+ /* FREE_OUT(pairarray); */
#endif
- } else {
- if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
- *good_start_p = true;
- }
- if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
- *good_end_p = true;
- }
- debug13(printf("Trim at start: %d, trim at end: %d\n",
- Stage3end_trim_left(hit),Stage3end_trim_right(hit)));
- if (terminal_threshold > user_maxlevel &&
- (Stage3end_trim_left_raw(hit) >= GOOD_GMAP_END || Stage3end_trim_right_raw(hit) >= GOOD_GMAP_END)) {
- debug13(printf("terminal_threshold %d > user_maxlevel %d, so freeing this GMAP hit\n",
- terminal_threshold,user_maxlevel));
- stored_hits = List_push(stored_hits,(void *) NULL);
- Stage3end_free(&hit);
} else {
- stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
- hits = List_push(hits,(void *) hit);
+ if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+ *good_start_p = true;
+ }
+ if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+ *good_end_p = true;
+ }
+ debug13(printf("Trim at start: %d, trim at end: %d\n",
+ Stage3end_trim_left(hit),Stage3end_trim_right(hit)));
+ /* Don't throw away GMAP hits */
+ if (0 && terminal_threshold > user_maxlevel &&
+ (Stage3end_trim_left_raw(hit) >= GOOD_GMAP_END || Stage3end_trim_right_raw(hit) >= GOOD_GMAP_END)) {
+ debug13(printf("terminal_threshold %d > user_maxlevel %d, so freeing this GMAP hit\n",
+ terminal_threshold,user_maxlevel));
+ stored_hits = List_push(stored_hits,(void *) NULL);
+ Stage3end_free(&hit);
+ } else {
+ stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
+ hits = List_push(hits,(void *) hit);
+ }
}
- }
- } else {
- start = add_bounded(chroffset + Pair_genomepos(&(pairarray[0])),
- /*plusterm*/Pair_querypos(&(pairarray[0])),chrhigh);
- end = subtract_bounded(chroffset + Pair_genomepos(&(pairarray[npairs-1])),
- /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),chroffset);
- if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
- ambig_end_length_5,ambig_end_length_3,
- ambig_splicetype_5,ambig_splicetype_3,
- ambig_prob_5,ambig_prob_3,min_splice_prob,
- pairarray,npairs,nsegments,nintrons,nindelbreaks,
- /*left*/end,/*genomiclength*/start - end + 1,
- /*plusp*/watsonp,genestrand,first_read_p,
- querylength,chrnum,chroffset,chrhigh,chrlength,
- cdna_direction,sensedir)) == NULL) {
- debug13(printf("Stage3end_new_gmap returns NULL\n"));
- stored_hits = List_push(stored_hits,(void *) NULL);
- FREE_OUT(pairarray);
+ } else {
+ start = add_bounded(chroffset + Pair_genomepos(&(pairarray[0])),
+ /*plusterm*/Pair_querypos(&(pairarray[0])),chrhigh);
+ end = subtract_bounded(chroffset + Pair_genomepos(&(pairarray[npairs-1])),
+ /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),chroffset);
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
+ ambig_end_length_5,ambig_end_length_3,
+ ambig_splicetype_5,ambig_splicetype_3,
+ ambig_prob_5,ambig_prob_3,min_splice_prob,
+ pairarray,npairs,nsegments,nintrons,nindelbreaks,
+ /*left*/end,/*genomiclength*/start - end + 1,
+ /*plusp*/watsonp,genestrand,first_read_p,
+ accession,querylength,chrnum,chroffset,chrhigh,chrlength,
+ cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_REGION)) == NULL) {
+ debug13(printf("Stage3end_new_gmap returns NULL\n"));
+ stored_hits = List_push(stored_hits,(void *) NULL);
+ FREE_OUT(pairarray);
#if 0
- } else if (Stage3end_bad_stretch_p(hit,query_compress_fwd,query_compress_rev) == true) {
- debug13(printf("Stage3end_new_gmap has a bad stretch\n"));
- stored_hits = List_push(stored_hits,(void *) NULL);
- Stage3end_free(&hit);
- /* FREE_OUT(pairarray); */
-#endif
-
- } else {
- if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
- *good_start_p = true;
- }
- if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
- *good_end_p = true;
- }
- debug13(printf("Trim at start: %d, trim at end: %d (raw %d and %d)\n",
- Stage3end_trim_right(hit),Stage3end_trim_left(hit),
- Stage3end_trim_right_raw(hit),Stage3end_trim_left_raw(hit)));
- if (terminal_threshold > user_maxlevel &&
- (Stage3end_trim_left_raw(hit) >= GOOD_GMAP_END || Stage3end_trim_right_raw(hit) >= GOOD_GMAP_END)) {
- debug13(printf("terminal_threshold %d > user_maxlevel %d, so freeing this GMAP hit\n",
- terminal_threshold,user_maxlevel));
+ } else if (Stage3end_bad_stretch_p(hit,query_compress_fwd,query_compress_rev) == true) {
+ debug13(printf("Stage3end_new_gmap has a bad stretch\n"));
stored_hits = List_push(stored_hits,(void *) NULL);
Stage3end_free(&hit);
+ /* FREE_OUT(pairarray); */
+#endif
+
} else {
- stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
- hits = List_push(hits,(void *) hit);
+ if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+ *good_start_p = true;
+ }
+ if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+ *good_end_p = true;
+ }
+ debug13(printf("Trim at start: %d, trim at end: %d (raw %d and %d)\n",
+ Stage3end_trim_right(hit),Stage3end_trim_left(hit),
+ Stage3end_trim_right_raw(hit),Stage3end_trim_left_raw(hit)));
+ /* Don't throw away GMAP hits */
+ if (0 && terminal_threshold > user_maxlevel &&
+ (Stage3end_trim_left_raw(hit) >= GOOD_GMAP_END || Stage3end_trim_right_raw(hit) >= GOOD_GMAP_END)) {
+ debug13(printf("terminal_threshold %d > user_maxlevel %d, so freeing this GMAP hit\n",
+ terminal_threshold,user_maxlevel));
+ stored_hits = List_push(stored_hits,(void *) NULL);
+ Stage3end_free(&hit);
+ } else {
+ stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
+ hits = List_push(hits,(void *) hit);
+ }
}
}
+ /* Don't free pairarray */
}
- /* Don't free pairarray */
}
- }
- Stage2_free(&stage2);
- }
- List_free(&all_stage2results);
+ Stage2_free(&stage2);
+ }
+ List_free(&all_stage2results);
#ifdef EXTRACT_GENOMICSEG
- FREE(genomicseg_alloc);
+ FREE(genomicseg_alloc);
#endif
- debug13(printf(" => Got good_start_p %d, good_end_p %d\n",*good_start_p,*good_end_p));
- debug13(printf("Storing history for interval at %u..%u (sense_try %d)\n",
- mappingstart,mappingend,sense_try));
- History_put(gmap_history,interval,stored_hits);
+ debug13(printf(" => Got good_start_p %d, good_end_p %d\n",*good_start_p,*good_end_p));
+ debug13(printf("Storing history for interval at %u..%u (sense_try %d)\n",
+ mappingstart,mappingend,sense_try));
+ History_put(gmap_history,interval,stored_hits);
+ }
+
return hits;
}
-static List_T
-align_single_hit_with_gmap (History_T gmap_history, Stage3end_T hit,
- bool extend_left_p, bool extend_right_p,
- char *queryuc_ptr, int querylength, int query_lastpos,
+static Stage3end_T
+align_single_hit_with_gmap (Stage3end_T hit, char *queryuc_ptr, int querylength,
#ifdef END_KNOWNSPLICING_SHORTCUT
char *queryrc, bool invertedp,
#endif
- Compress_T query_compress_fwd, Compress_T query_compress_rev,
- struct Segment_T *plus_segments, int plus_nsegments,
- struct Segment_T *minus_segments, int minus_nsegments,
- Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+ Oligoindex_array_T oligoindices_minor,
Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- int user_maxlevel, int genestrand, bool first_read_p) {
- List_T hits = NULL;
+ int genestrand, bool first_read_p) {
+ /* Both events are tested by Stage3end_anomalous_splice_p */
+ if (Stage3end_chrnum(hit) == 0) {
+ /* Translocation */
+ return (Stage3end_T) NULL;
+
+ } else if (Stage3end_hittype(hit) == SAMECHR_SPLICE) {
+ /* A genomic event that doesn't get reflected in chrnum */
+ return (Stage3end_T) NULL;
- Univcoord_T segmentstart, segmentend;
+ } else if (Stage3end_hittype(hit) == GMAP) {
+ return (Stage3end_T) NULL;
+
+ } else if (Stage3end_plusp(hit) == true) {
+ return Stage3end_substrings_run_gmap_plus(hit,queryuc_ptr,querylength,genestrand,first_read_p,
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ oligoindices_minor,diagpool,cellpool);
+ } else {
+ return Stage3end_substrings_run_gmap_minus(hit,queryuc_ptr,querylength,genestrand,first_read_p,
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ oligoindices_minor,diagpool,cellpool);
+ }
+}
+
+
+#if 0
+static List_T
+convert_plus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
+ char *accession, char *queryuc_ptr, int querylength, int query_lastpos,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ char *queryrc, bool invertedp,
+#endif
+ Compress_T query_compress_fwd, Compress_T query_compress_rev,
+ List_T anchor_segments, struct Segment_T *plus_segments, int plus_nsegments,
+ Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+ Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+ Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ int user_maxlevel, int genestrand, bool first_read_p,
+ bool require_pairing_p) {
+ Univcoord_T segmentstart, segmentend, left;
Univcoord_T mappingstart, mappingend, chroffset, chrhigh, mappingpos;
Univcoord_T origlow, orighigh;
- Univcoord_T close_mappingstart_greedy, close_mappingend_greedy,
- close_mappingstart_last, close_mappingend_last;
- Univcoord_T middle_mappingstart_greedy, middle_mappingend_greedy,
+ Univcoord_T close_mappingstart_last, close_mappingend_last,
middle_mappingstart_last, middle_mappingend_last;
Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
Univcoord_T close_knownsplice_limit_low, close_knownsplice_limit_high;
@@ -12887,2382 +13485,3809 @@ align_single_hit_with_gmap (History_T gmap_history, Stage3end_T hit,
bool close_mappingstart_p = false, close_mappingend_p = false;
bool middle_mappingstart_p = false, middle_mappingend_p = false;
bool fallback_mappingstart_p, fallback_mappingend_p;
- bool good_start_p, good_end_p, watsonp, favor_right_p;
-
- int starti, endi, i;
-
-
- /* Both events are tested by Stage3end_anomalous_splice_p */
- if ((chrnum = Stage3end_chrnum(hit)) == 0) {
- /* Translocation */
- return (List_T) NULL;
-
- } else if (Stage3end_hittype(hit) == SAMECHR_SPLICE) {
- /* A genomic event that doesn't get reflected in chrnum */
- return (List_T) NULL;
-
- } else {
- chroffset = Stage3end_chroffset(hit);
- chrhigh = Stage3end_chrhigh(hit);
- chrlength = Stage3end_chrlength(hit);
- }
+ bool good_start_p, good_end_p, favor_right_p = false;
+ bool novelp; /* Want any of the segments in (startk+1)..(endk-1) to not be used */
+ bool pairablep; /* Want any of the segments in (startk+1)..(endk-1) to be pairable */
- if ((watsonp = Stage3end_plusp(hit)) == true) {
- origlow = Stage3end_genomicstart(hit);
- orighigh = Stage3end_genomicend(hit);
-
- if (extend_left_p == true) {
- knownsplice_limit_low = subtract_bounded(origlow,shortsplicedist,chroffset);
-#ifdef LONG_ENDSPLICES
- mappingstart = segmentstart = subtract_bounded(origlow,shortsplicedist,chroffset);
-#else
- mappingstart = segmentstart = subtract_bounded(origlow,shortsplicedist_novelend,chroffset);
-#endif
+ List_T p;
+ Segment_T anchor_segment;
+ int anchork, startk, endk, k;
- } else {
- knownsplice_limit_low = mappingstart = segmentstart = origlow;
+ anchork = 0;
+ for (p = anchor_segments; p != NULL; p = List_next(p)) {
+ anchor_segment = (Segment_T) List_head(p);
+ assert(anchor_segment->diagonal != (Univcoord_T) -1);
+ while (plus_segments[anchork].diagonal != anchor_segment->diagonal) {
+ anchork++;
}
- debug13(printf("Original bounds A: knownsplice_limit_low %u, mappingstart %u\n",
- knownsplice_limit_low - chroffset,mappingstart - chroffset));
-
- if (extend_right_p == true) {
- knownsplice_limit_high = add_bounded(orighigh,shortsplicedist,chrhigh);
-#ifdef LONG_ENDSPLICES
- mappingend = segmentend = add_bounded(orighigh,shortsplicedist,chrhigh);
-#else
- mappingend = segmentend = add_bounded(orighigh,shortsplicedist_novelend,chrhigh);
-#endif
- } else {
- knownsplice_limit_high = mappingend = segmentend = orighigh;
- }
- debug13(printf("Original bounds B: knownsplice_limit_high %u, mappingend %u\n",
- knownsplice_limit_high - chroffset,mappingend - chroffset));
-
- debug13(printf("plus hit %u..%u (extend_left_p %d, extend_right_p %d) (sensedir %d) => segment bounds %u..%u\n",
- Stage3end_genomicstart(hit) - chroffset,Stage3end_genomicend(hit) - chroffset,
- extend_left_p,extend_right_p,Stage3end_sensedir(hit),segmentstart-chroffset,segmentend-chroffset));
-
- close_mappingstart_last = middle_mappingstart_last = origlow;
- close_mappingend_last = middle_mappingend_last = orighigh;
- close_mappingstart_greedy = middle_mappingstart_greedy = segmentstart;
- close_mappingend_greedy = middle_mappingend_greedy = segmentend;
-
- if (plus_nsegments > 0) {
- /* Use segments to bound */
- debug13(printf("Finding segments from segmentstart %u to segmentend %u (plus_nsegments %d)\n",
- segmentstart - chroffset,segmentend - chroffset,plus_nsegments));
- starti = endi = -1;
- i = binary_search_segments(0,plus_nsegments-1,plus_segments,segmentend);
- while (i >= 0 && plus_segments[i].diagonal >= segmentend) {
- i--;
- }
- starti = i;
- while (i >= 0 && plus_segments[i].diagonal > segmentstart) {
- if (plus_segments[i].diagonal < (Univcoord_T) -1) {
- endi = i;
- }
- i--;
- }
- if (extend_left_p == true && starti >= 0 && endi >= 0) {
- debug13(printf("starti = %d, endi = %d\n",starti,endi));
- assert(starti >= endi);
- for (i = starti; i >= endi; i--) {
- debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
- (Chrpos_T) (plus_segments[i].diagonal - chroffset),(unsigned long long) plus_segments[i].diagonal,
- plus_segments[i].querypos5,plus_segments[i].querypos3));
- if (plus_segments[i].querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
- /* Case 3. Missing start of query, so there could be a middle splice */
- debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
- plus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
- if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength + shortsplicedist_novelend,chroffset)) > middle_mappingstart_greedy &&
- mappingpos < origlow) {
- middle_mappingstart_greedy = mappingpos;
- middle_mappingstart_p = true;
- debug13(printf(" Redefining middle mappingstart greedy to %u\n",middle_mappingstart_greedy - chroffset));
- }
-#ifdef LONG_ENDSPLICES
- if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength + shortsplicedist,chroffset)) < middle_mappingstart_last) {
- /* Use < for NOT_GREEDY */
- middle_mappingstart_last = mappingpos;
- middle_mappingstart_p = true;
- debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
- }
-#else
- if (mappingpos < middle_mappingstart_last) {
- /* Use < for NOT_GREEDY */
- middle_mappingstart_last = mappingpos;
- middle_mappingstart_p = true;
- debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
- }
-#endif
- } else {
- debug13b(printf(" querypos5 %d < %d + %d, so using this diagonal\n",
- plus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
- if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength,chroffset)) > close_mappingstart_greedy &&
- mappingpos < origlow) {
- close_mappingstart_greedy = mappingpos;
- close_mappingstart_p = true;
- debug13(printf(" Redefining close mappingstart last to %u\n",close_mappingstart_greedy - chroffset));
- }
- if (mappingpos < close_mappingstart_last) {
- /* Use < for NOT_GREEDY */
- close_mappingstart_last = mappingpos;
- close_mappingstart_p = true;
- debug13(printf(" Redefining close mappingstart last to %u\n",close_mappingstart_last - chroffset));
- }
- }
- }
+ novelp = (anchor_segment->usedp == true) ? false : true;
+ pairablep = anchor_segment->pairablep;
+ anchor_segment->usedp = true;
- if (close_mappingstart_p == true) {
- close_knownsplice_limit_low = subtract_bounded(close_mappingstart_greedy,shortsplicedist,chroffset);
- } else if (middle_mappingstart_p == true) {
- debug13(printf("Using middle mappingstart\n"));
- close_knownsplice_limit_low = middle_mappingstart_greedy;
- close_mappingstart_greedy = middle_mappingstart_greedy;
- close_mappingstart_p = true;
- }
- if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_greedy) {
- knownsplice_limit_low = middle_mappingstart_last;
- mappingstart = middle_mappingstart_last;
- } else if (close_mappingstart_p == true && close_mappingstart_last != close_mappingstart_greedy) {
- knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
- mappingstart = close_mappingstart_last;
- }
- if (close_mappingstart_p == false) {
- fallback_mappingstart_p = false;
- } else if (mappingstart >= close_mappingstart_greedy) {
- fallback_mappingstart_p = false;
- } else {
- debug13(printf("Fallback mappingstart = %u\n",mappingstart - chroffset));
- fallback_mappingstart_p = true;
- }
+ startk = anchork - 1;
+ while (startk >= 0 && plus_segments[startk].diagonal != (Univcoord_T) -1 &&
+ plus_segments[startk].diagonal + shortsplicedist > anchor_segment->diagonal) {
+ if (plus_segments[startk].usedp == false) {
+ novelp = true;
}
+ plus_segments[startk].usedp = true;
+ if (plus_segments[startk].pairablep == true) {
+ pairablep = true;
+ }
+ startk--;
+ }
- if (extend_right_p == true && starti >= 0 && endi >= 0) {
- debug13(printf("starti = %d, endi = %d\n",starti,endi));
- assert(starti >= endi);
- for (i = starti; i >= endi; i--) {
- debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
- (Chrpos_T) (plus_segments[i].diagonal - chroffset),(unsigned long long) plus_segments[i].diagonal,
- plus_segments[i].querypos5,plus_segments[i].querypos3));
- if (query_lastpos - plus_segments[i].querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
- /* Case 1. Missing end of query, so there could be a middle splice */
- debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
- query_lastpos,plus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
- if ((mappingpos = add_bounded(plus_segments[i].diagonal,shortsplicedist_novelend,chrhigh)) < middle_mappingend_greedy &&
- mappingpos > orighigh) {
- middle_mappingend_greedy = mappingpos;
- middle_mappingend_p = true;
- debug13(printf(" Redefining middle mappingend greedy to %u\n",middle_mappingend_greedy - chroffset));
- }
-#ifdef LONG_ENDSPLICES
- if ((mappingpos = add_bounded(plus_segments[i].diagonal,shortsplicedist,chrhigh)) > middle_mappingend_last) {
- /* Use > for NOT_GREEDY */
- middle_mappingend_last = mappingpos;
- middle_mappingend_p = true;
- debug13(printf(" Redefining middle mappingend last to %u\n",middle_mappingend_last - chroffset));
- }
-#else
- if (mappingpos > middle_mappingend_last) {
- /* Use > for NOT_GREEDY */
- middle_mappingend_last = mappingpos;
- middle_mappingend_p = true;
- debug13(printf(" Redefining middle mappingend last to %u\n",middle_mappingend_last - chroffset));
- }
-#endif
-
- } else {
- debug13b(printf(" query_lastpos %d - querypos3 %d < %d + %d, so using this diagonal\n",
- query_lastpos,plus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
- if ((mappingpos = plus_segments[i].diagonal) < close_mappingend_greedy &&
- mappingpos > orighigh) {
- close_mappingend_greedy = mappingpos;
- close_mappingend_p = true;
- debug13(printf(" Redefining close mappingend greedy to %u\n",close_mappingend_greedy - chroffset));
- }
- if (mappingpos > close_mappingend_last) {
- /* Use > for NOT_GREEDY */
- close_mappingend_last = mappingpos;
- close_mappingend_p = true;
- debug13(printf(" Redefining close mappingend last to %u\n",close_mappingend_last - chroffset));
- }
- }
- }
-
- if (close_mappingend_p == true) {
- close_knownsplice_limit_high = add_bounded(close_mappingend_greedy,shortsplicedist,chrhigh);
- } else if (middle_mappingend_p == true) {
- close_knownsplice_limit_high = middle_mappingend_greedy;
- close_mappingend_greedy = middle_mappingend_greedy;
- close_mappingend_p = true;
- debug13(printf("Using middle mappingend => close_mappingend %u\n",close_mappingend_greedy));
- }
- if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_greedy) {
- knownsplice_limit_high = middle_mappingend_last;
- mappingend = middle_mappingend_last;
- } else if (close_mappingend_p == true && close_mappingend_last != close_mappingend_greedy) {
- knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
- mappingend = close_mappingend_last;
- }
- if (close_mappingend_p == false) {
- fallback_mappingend_p = false;
- } else if (mappingend <= close_mappingend_greedy) {
- fallback_mappingend_p = false;
- } else {
- debug13(printf("Fallback mappingend = %u\n",mappingend - chroffset));
- fallback_mappingend_p = true;
- }
+ endk = anchork + 1;
+ while (endk < plus_nsegments && plus_segments[endk].diagonal < anchor_segment->diagonal + shortsplicedist) {
+ if (plus_segments[endk].usedp == false) {
+ novelp = true;
}
+ plus_segments[endk].usedp = true;
+ if (plus_segments[endk].pairablep == true) {
+ pairablep = true;
+ }
+ endk++;
}
- favor_right_p = false;
+ if (novelp == true && (pairablep == true || require_pairing_p == false)) {
+ debug13(printf("Processing segments %d to %d inclusive\n",startk+1,endk-1));
+ chrnum = anchor_segment->chrnum;
+ chroffset = anchor_segment->chroffset;
+ chrhigh = anchor_segment->chrhigh;
+ chrlength = anchor_segment->chrlength;
- } else {
- origlow = Stage3end_genomicend(hit);
- orighigh = Stage3end_genomicstart(hit);
-
- if (extend_right_p == true) {
+ left = anchor_segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
+ origlow = left - anchor_segment->querypos5;
+ orighigh = left + (querylength - anchor_segment->querypos3);
+
+ /* extend left */
knownsplice_limit_low = subtract_bounded(origlow,shortsplicedist,chroffset);
-#ifdef LONG_ENDSPLICES
mappingstart = segmentstart = subtract_bounded(origlow,shortsplicedist,chroffset);
-#else
- mappingstart = segmentstart = subtract_bounded(origlow,shortsplicedist_novelend,chroffset);
-#endif
- } else {
- knownsplice_limit_low = mappingstart = segmentstart = origlow;
- }
- debug13(printf("Original bounds C: knownsplice_limit_low %u, mappingstart %u\n",
- knownsplice_limit_low - chroffset,mappingstart - chroffset));
+ debug13(printf("Original bounds A: knownsplice_limit_low %u, mappingstart %u\n",
+ knownsplice_limit_low - chroffset,mappingstart - chroffset));
- if (extend_left_p == true) {
+ /* extend right */
knownsplice_limit_high = add_bounded(orighigh,shortsplicedist,chrhigh);
-#ifdef LONG_ENDSPLICES
- mappingend = segmentend = add_bounded(orighigh,shortsplicedist,chrhigh);
-#else
- mappingend = segmentend = add_bounded(orighigh,shortsplicedist_novelend,chrhigh);
-#endif
- } else {
- knownsplice_limit_high = mappingend = segmentend = orighigh;
- }
- debug13(printf("Original bounds D: knownsplice_limit_high %u, mappingend %u\n",
- knownsplice_limit_high - chroffset,mappingend - chroffset));
-
- debug13(printf("minus hit %u..%u (extend_left_p %d, extend_right_p %d) (sensedir %d), => segmentbounds %u..%u\n",
- Stage3end_genomicstart(hit) - chroffset,Stage3end_genomicend(hit) - chroffset,
- extend_left_p,extend_right_p,Stage3end_sensedir(hit),segmentstart - chroffset,segmentend - chroffset));
-
- close_mappingstart_last = middle_mappingstart_last = origlow;
- close_mappingend_last = middle_mappingend_last = orighigh;
- close_mappingstart_greedy = middle_mappingstart_greedy = segmentstart;
- close_mappingend_greedy = middle_mappingend_greedy = segmentend;
-
- if (minus_nsegments > 0) {
- /* Use segments to bound */
- debug13(printf("Finding segments from segmentstart %u to segmentend %u, (minus_nsegments %d)\n",
- segmentstart - chroffset,segmentend - chroffset,minus_nsegments));
- starti = endi = -1;
- i = binary_search_segments(0,minus_nsegments-1,minus_segments,segmentend);
- while (i >= 0 && minus_segments[i].diagonal >= segmentend) {
- i--;
- }
- starti = i;
- while (i >= 0 && minus_segments[i].diagonal > segmentstart) {
- if (minus_segments[i].diagonal < (Univcoord_T) -1) {
- endi = i;
- }
- i--;
- }
- if (extend_right_p == true && starti >= 0 && endi >= 0) {
- debug13(printf("starti = %d, endi = %d\n",starti,endi));
- assert(starti >= endi);
- for (i = starti; i >= endi; i--) {
- debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
- (Chrpos_T) (minus_segments[i].diagonal - chroffset),(unsigned long long) minus_segments[i].diagonal,
- minus_segments[i].querypos5,minus_segments[i].querypos3));
- if (query_lastpos - minus_segments[i].querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
- /* Case 2. Missing end of query, so there could be a middle splice */
- debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
- query_lastpos,minus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
- if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength + shortsplicedist_novelend,chroffset)) > middle_mappingstart_greedy &&
- mappingpos < origlow) {
- middle_mappingstart_greedy = mappingpos;
- middle_mappingstart_p = true;
- debug13(printf(" Redefining middle mappingstart greedy to %u\n",middle_mappingstart_greedy - chroffset));
- }
-#ifdef LONG_ENDSPLICES
- if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength + shortsplicedist,chroffset)) < middle_mappingstart_last) {
- /* Use < for NOT_GREEDY */
- middle_mappingstart_last = mappingpos;
- middle_mappingstart_p = true;
- debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
- }
-#else
- if (mappingpos < middle_mappingstart_last) {
- /* Use < for NOT_GREEDY */
- middle_mappingstart_last = mappingpos;
- middle_mappingstart_p = true;
- debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
- }
-#endif
-
- } else {
- debug13b(printf(" query_lastpos %d - querypos3 %d < %d + %d, so using this diagonal\n",
- query_lastpos,minus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
- if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength,chroffset)) > close_mappingstart_greedy &&
- mappingpos < origlow) {
- close_mappingstart_greedy = mappingpos;
- close_mappingstart_p = true;
- debug13(printf(" Redefining close mappingstart greedy to %u\n",close_mappingstart_greedy - chroffset));
- }
- if (mappingpos < close_mappingstart_last) {
- /* Use < for NOT_GREEDY */
- close_mappingstart_last = mappingpos;
- close_mappingstart_p = true;
- debug13(printf(" Redefining close mappingstart last to %u\n",close_mappingstart_last - chroffset));
- }
+ mappingend = segmentend = add_bounded(orighigh,shortsplicedist,chrhigh);
+ debug13(printf("Original bounds B: knownsplice_limit_high %u, mappingend %u\n",
+ knownsplice_limit_high - chroffset,mappingend - chroffset));
+
+ close_mappingstart_last = middle_mappingstart_last = origlow;
+ close_mappingend_last = middle_mappingend_last = orighigh;
+ close_mappingstart_p = close_mappingend_p = false;
+ middle_mappingstart_p = middle_mappingend_p = false;
+
+ /* 1 */
+ for (k = startk + 1; k < endk; k++) {
+ debug13(printf("1. plus diagonal %u (%llu), querypos %d..%d, usedp %d, pairablep %d\n",
+ (Chrpos_T) (plus_segments[k].diagonal - chroffset),(unsigned long long) plus_segments[k].diagonal,
+ plus_segments[k].querypos5,plus_segments[k].querypos3,plus_segments[k].usedp,plus_segments[k].pairablep));
+ if (plus_segments[k].querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 3. Missing start of query, so there could be a middle splice */
+ debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ plus_segments[k].querypos5,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = subtract_bounded(plus_segments[k].diagonal,querylength + shortsplicedist,chroffset)) < middle_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ middle_mappingstart_last = mappingpos;
+ middle_mappingstart_p = true;
+ debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
}
- }
- if (close_mappingstart_p == true) {
- close_knownsplice_limit_low = subtract_bounded(close_mappingstart_greedy,shortsplicedist,chroffset);
- } else if (middle_mappingstart_p == true) {
- debug13(printf("Using middle mappingstart\n"));
- close_knownsplice_limit_low = middle_mappingstart_greedy;
- close_mappingstart_greedy = middle_mappingstart_greedy;
- close_mappingstart_p = true;
- }
- if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_greedy) {
- knownsplice_limit_low = middle_mappingstart_last;
- mappingstart = middle_mappingstart_last;
- } else if (close_mappingstart_p == true && close_mappingstart_last != close_mappingstart_greedy) {
- knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
- mappingstart = close_mappingstart_last;
- }
- if (close_mappingstart_p == false) {
- fallback_mappingstart_p = false;
- } else if (mappingstart >= close_mappingstart_greedy) {
- fallback_mappingstart_p = false;
} else {
- debug13(printf("Fallback mappingstart = %u\n",mappingstart - chroffset));
- fallback_mappingstart_p = true;
+ debug13b(printf(" querypos5 %d < %d + %d, so using this diagonal\n",
+ plus_segments[k].querypos5,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = subtract_bounded(plus_segments[k].diagonal,querylength,chroffset)) < close_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ close_mappingstart_last = mappingpos;
+ close_mappingstart_p = true;
+ debug13(printf(" Redefining close mappingstart last to %u\n",close_mappingstart_last - chroffset));
+ }
}
- }
- if (extend_left_p == true && starti >= 0 && endi >= 0) {
- debug13(printf("starti = %d, endi = %d\n",starti,endi));
- assert(starti >= endi);
- for (i = starti; i >= endi; i--) {
- debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
- (Chrpos_T) (minus_segments[i].diagonal - chroffset),(unsigned long long) minus_segments[i].diagonal,
- minus_segments[i].querypos5,minus_segments[i].querypos3));
- if (minus_segments[i].querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
- /* Case 4. Missing start of query, so there could be a middle splice */
- debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
- minus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
- if ((mappingpos = add_bounded(minus_segments[i].diagonal,shortsplicedist_novelend,chrhigh)) < middle_mappingend_greedy &&
- mappingpos > orighigh) {
- middle_mappingend_greedy = mappingpos;
- middle_mappingend_p = true;
- debug13(printf(" Redefining middle mappingend greedy to %u\n",middle_mappingend_greedy - chroffset));
- }
-#ifdef LONG_ENDSPLICES
- if ((mappingpos = add_bounded(minus_segments[i].diagonal,shortsplicedist,chrhigh)) > middle_mappingend_last) {
- /* Use > for NOT_GREEDY */
- middle_mappingend_last = mappingpos;
- middle_mappingend_p = true;
- debug13(printf(" Redefining middle mappingend last to %u\n",middle_mappingend_last - chroffset));
- }
-#else
- if (mappingpos > middle_mappingend_last) {
- /* Use > for NOT_GREEDY */
- middle_mappingend_last = mappingpos;
- middle_mappingend_p = true;
- debug13(printf(" Redefining middle mappingend last to %u\n",middle_mappingend_last - chroffset));
- }
-#endif
- } else {
- debug13b(printf(" querypos5 %d < %d + %d, so using this diagonal\n",
- minus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
- if ((mappingpos = minus_segments[i].diagonal) < close_mappingend_greedy &&
- mappingpos > orighigh) {
- close_mappingend_greedy = mappingpos;
- close_mappingend_p = true;
- debug13(printf(" Redefining close mappingend greedy to %u\n",close_mappingend_greedy - chroffset));
- }
- if (mappingpos > close_mappingend_last) {
- /* Use > for NOT_GREEDY */
- close_mappingend_last = mappingpos;
- close_mappingend_p = true;
- debug13(printf(" Redefining close mappingend last to %u\n",close_mappingend_last - chroffset));
- }
+ if (query_lastpos - plus_segments[k].querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 1. Missing end of query, so there could be a middle splice */
+ debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ query_lastpos,plus_segments[k].querypos3,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = add_bounded(plus_segments[k].diagonal,shortsplicedist,chrhigh)) > middle_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ middle_mappingend_last = mappingpos;
+ middle_mappingend_p = true;
+ debug13(printf(" Redefining middle mappingend last to %u\n",middle_mappingend_last - chroffset));
}
- }
- if (close_mappingend_p == true) {
- close_knownsplice_limit_high = add_bounded(close_mappingend_greedy,shortsplicedist,chrhigh);
- } else if (middle_mappingend_p == true) {
- debug13(printf("Using middle mappingend\n"));
- close_knownsplice_limit_high = middle_mappingend_greedy;
- close_mappingend_greedy = middle_mappingend_greedy;
- close_mappingend_p = true;
- }
- if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_greedy) {
- knownsplice_limit_high = middle_mappingend_last;
- mappingend = middle_mappingend_last;
- } else if (close_mappingend_p == true && close_mappingstart_last != close_mappingstart_greedy) {
- knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
- mappingend = close_mappingend_last;
- }
- if (close_mappingend_p == false) {
- fallback_mappingend_p = false;
- } else if (mappingend <= close_mappingend_greedy) {
- fallback_mappingend_p = false;
} else {
- debug13(printf("Fallback mappingend = %u\n",mappingend - chroffset));
- fallback_mappingend_p = true;
+ debug13b(printf(" query_lastpos %d - querypos3 %d < %d + %d, so using this diagonal\n",
+ query_lastpos,plus_segments[k].querypos3,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = plus_segments[k].diagonal) > close_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ close_mappingend_last = mappingpos;
+ close_mappingend_p = true;
+ debug13(printf(" Redefining close mappingend last to %u\n",close_mappingend_last - chroffset));
+ }
}
}
- }
-
- favor_right_p = true;
- }
- if (close_mappingstart_p == true && close_mappingend_p == true) {
- debug13(printf("Single hit: Running gmap with close mappingstart and close mappingend\n"));
- hits = run_gmap(&good_start_p,&good_end_p,gmap_history,hits,queryuc_ptr,querylength,
- /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
- query_compress_fwd,query_compress_rev,
- close_mappingstart_greedy,close_mappingend_greedy,
- close_knownsplice_limit_low,close_knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ /* 2 */
+ if (close_mappingstart_p == true) {
+ close_knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
+ } else if (middle_mappingstart_p == true) {
+ debug13(printf("Using middle mappingstart\n"));
+ close_knownsplice_limit_low = middle_mappingstart_last;
+ close_mappingstart_last = middle_mappingstart_last;
+ close_mappingstart_p = true;
+ }
- if (good_start_p == true && good_end_p == true) {
- /* Success */
- } else if (gmap_rerun_p == false) {
- debug13(printf("Skipping re-run of gmap\n"));
- } else if (good_start_p == true) {
- if (fallback_mappingend_p == true) {
- debug13(printf("Single hit: Re-running gmap with close mappingstart only\n"));
- hits = run_gmap(&good_start_p,&good_end_p,gmap_history,hits,queryuc_ptr,querylength,
- /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
- query_compress_fwd,query_compress_rev,close_mappingstart_greedy,mappingend,
- close_knownsplice_limit_low,knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_last) {
+ knownsplice_limit_low = middle_mappingstart_last;
+ mappingstart = middle_mappingstart_last;
}
- } else if (good_end_p == true) {
- if (fallback_mappingstart_p == true) {
- debug13(printf("Single hit: Re-running gmap with close mappingend only\n"));
- hits = run_gmap(&good_start_p,&good_end_p,gmap_history,hits,queryuc_ptr,querylength,
- /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
- query_compress_fwd,query_compress_rev,mappingstart,close_mappingend_greedy,
- knownsplice_limit_low,close_knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ if (close_mappingstart_p == false) {
+ fallback_mappingstart_p = false;
+ } else {
+ debug13(printf("Fallback mappingstart = %u\n",mappingstart - chroffset));
+ fallback_mappingstart_p = true;
}
- } else {
- if (fallback_mappingstart_p == true && fallback_mappingend_p == true) {
- debug13(printf("Single hit: Re-running gmap with far mappingstart and mappingend\n"));
- hits = run_gmap(&good_start_p,&good_end_p,gmap_history,hits,queryuc_ptr,querylength,
- /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
- query_compress_fwd,query_compress_rev,mappingstart,mappingend,
- knownsplice_limit_low,close_knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+
+ /* 3 */
+ if (close_mappingend_p == true) {
+ close_knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
+ } else if (middle_mappingend_p == true) {
+ close_knownsplice_limit_high = middle_mappingend_last;
+ close_mappingend_last = middle_mappingend_last;
+ close_mappingend_p = true;
+ debug13(printf("Using middle mappingend => close_mappingend %u\n",close_mappingend_last));
}
- }
+ if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_last) {
+ knownsplice_limit_high = middle_mappingend_last;
+ mappingend = middle_mappingend_last;
+ }
+ if (close_mappingend_p == false) {
+ fallback_mappingend_p = false;
+ } else {
+ debug13(printf("Fallback mappingend = %u\n",mappingend - chroffset));
+ fallback_mappingend_p = true;
+ }
+
+ /* 4 */
+ if (close_mappingstart_p == true && close_mappingend_p == true) {
+ debug13(printf("Single hit: Running gmap with close mappingstart and close mappingend\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+ /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
+ query_compress_fwd,query_compress_rev,
+ close_mappingstart_last,close_mappingend_last,
+ close_knownsplice_limit_low,close_knownsplice_limit_high,
+ /*plusp*/true,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+
+ if (good_start_p == true && good_end_p == true) {
+ /* Success */
+ } else if (gmap_rerun_p == false) {
+ debug13(printf("Skipping re-run of gmap\n"));
+ } else if (good_start_p == true) {
+ if (fallback_mappingend_p == true) {
+ debug13(printf("Single hit: Re-running gmap with close mappingstart only\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+ /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
+ query_compress_fwd,query_compress_rev,close_mappingstart_last,mappingend,
+ close_knownsplice_limit_low,knownsplice_limit_high,
+ /*plusp*/true,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ }
+ } else if (good_end_p == true) {
+ if (fallback_mappingstart_p == true) {
+ debug13(printf("Single hit: Re-running gmap with close mappingend only\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+ /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
+ query_compress_fwd,query_compress_rev,mappingstart,close_mappingend_last,
+ knownsplice_limit_low,close_knownsplice_limit_high,
+ /*plusp*/true,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ }
+ } else {
+ if (fallback_mappingstart_p == true && fallback_mappingend_p == true) {
+ debug13(printf("Single hit: Re-running gmap with far mappingstart and mappingend\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+ /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
+ query_compress_fwd,query_compress_rev,mappingstart,mappingend,
+ knownsplice_limit_low,close_knownsplice_limit_high,
+ /*plusp*/true,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ }
+ }
- } else if (close_mappingstart_p == true) {
- debug13(printf("Single hit: Running gmap with close mappingstart\n"));
- hits = run_gmap(&good_start_p,&good_end_p,gmap_history,hits,queryuc_ptr,querylength,
- /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
- query_compress_fwd,query_compress_rev,close_mappingstart_greedy,mappingend,
- close_knownsplice_limit_low,knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
- if (good_start_p == true) {
- /* Success */
- } else if (gmap_rerun_p == false) {
- debug13(printf("Skipping re-run of gmap\n"));
- } else if (fallback_mappingstart_p == true) {
- debug13(printf("Single hit: Re-running gmap with far mappingstart\n"));
- hits = run_gmap(&good_start_p,&good_end_p,gmap_history,hits,queryuc_ptr,querylength,
- /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
- query_compress_fwd,query_compress_rev,mappingstart,mappingend,
- knownsplice_limit_low,knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
- }
+ } else if (close_mappingstart_p == true) {
+ debug13(printf("Single hit: Running gmap with close mappingstart\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+ /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
+ query_compress_fwd,query_compress_rev,close_mappingstart_last,mappingend,
+ close_knownsplice_limit_low,knownsplice_limit_high,
+ /*plusp*/true,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ if (good_start_p == true) {
+ /* Success */
+ } else if (gmap_rerun_p == false) {
+ debug13(printf("Skipping re-run of gmap\n"));
+ } else if (fallback_mappingstart_p == true) {
+ debug13(printf("Single hit: Re-running gmap with far mappingstart\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+ /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
+ query_compress_fwd,query_compress_rev,mappingstart,mappingend,
+ knownsplice_limit_low,knownsplice_limit_high,
+ /*plusp*/true,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ }
- } else if (close_mappingend_p == true) {
- debug13(printf("Single hit: Running gmap with close mappingend\n"));
- hits = run_gmap(&good_start_p,&good_end_p,gmap_history,hits,queryuc_ptr,querylength,
- /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
- query_compress_fwd,query_compress_rev,mappingstart,close_mappingend_greedy,
- knownsplice_limit_low,close_knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
- if (good_end_p == true) {
- /* Success */
- } else if (gmap_rerun_p == false) {
- debug13(printf("Skipping re-run of gmap\n"));
- } else if (fallback_mappingend_p == true) {
- debug13(printf("Single hit: Re-running gmap with far mappingend\n"));
- hits = run_gmap(&good_start_p,&good_end_p,gmap_history,hits,queryuc_ptr,querylength,
- /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
- query_compress_fwd,query_compress_rev,mappingstart,mappingend,
- knownsplice_limit_low,knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ } else if (close_mappingend_p == true) {
+ debug13(printf("Single hit: Running gmap with close mappingend\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+ /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
+ query_compress_fwd,query_compress_rev,mappingstart,close_mappingend_last,
+ knownsplice_limit_low,close_knownsplice_limit_high,
+ /*plusp*/true,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ if (good_end_p == true) {
+ /* Success */
+ } else if (gmap_rerun_p == false) {
+ debug13(printf("Skipping re-run of gmap\n"));
+ } else if (fallback_mappingend_p == true) {
+ debug13(printf("Single hit: Re-running gmap with far mappingend\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+ /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
+ query_compress_fwd,query_compress_rev,mappingstart,mappingend,
+ knownsplice_limit_low,knownsplice_limit_high,
+ /*plusp*/true,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ }
+
+ } else {
+ debug13(printf("Single hit: Running gmap with far mappingstart and mappingend\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+ /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
+ query_compress_fwd,query_compress_rev,mappingstart,mappingend,
+ knownsplice_limit_low,knownsplice_limit_high,
+ /*plusp*/true,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ }
}
-
- } else {
- debug13(printf("Single hit: Running gmap with far mappingstart and mappingend\n"));
- hits = run_gmap(&good_start_p,&good_end_p,gmap_history,hits,queryuc_ptr,querylength,
- /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
- query_compress_fwd,query_compress_rev,mappingstart,mappingend,
- knownsplice_limit_low,knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
}
return hits;
}
+#endif
-
+#if 0
static List_T
-align_singleend_with_gmap (History_T gmap_history, List_T result, T this,
- Compress_T query_compress_fwd, Compress_T query_compress_rev,
- char *queryuc_ptr, int querylength, int query_lastpos,
- Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
- Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
- Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- int user_maxlevel, int cutoff_level, bool first_read_p) {
- List_T new_result = NULL, gmap_hits = NULL;
- Stage3end_T hit, gmap;
- List_T p, a;
- int genestrand;
- int missing_hit, missing_gmap;
- int i;
- bool gmap_better_p;
-
-
- debug13(printf("Sorting hits by nmatches\n"));
- result = Stage3end_sort_bymatches(result);
-
- for (p = result, i = 0; p != NULL && i < max_gmap_improvement; p = p->rest, i++) {
- hit = (Stage3end_T) List_head(p);
- genestrand = Stage3end_genestrand(hit);
+convert_minus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
+ char *accession, char *queryuc_ptr, int querylength, int query_lastpos,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ char *queryrc, bool invertedp,
+#endif
+ Compress_T query_compress_fwd, Compress_T query_compress_rev,
+ List_T anchor_segments, struct Segment_T *minus_segments, int minus_nsegments,
+ Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+ Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+ Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ int user_maxlevel, int genestrand, bool first_read_p,
+ bool require_pairing_p) {
+ Univcoord_T segmentstart, segmentend, left;
+ Univcoord_T mappingstart, mappingend, chroffset, chrhigh, mappingpos;
+ Univcoord_T origlow, orighigh;
+ Univcoord_T close_mappingstart_last, close_mappingend_last,
+ middle_mappingstart_last, middle_mappingend_last;
+ Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
+ Univcoord_T close_knownsplice_limit_low, close_knownsplice_limit_high;
+ Chrpos_T chrlength;
+ Chrnum_T chrnum;
+ bool close_mappingstart_p, close_mappingend_p;
+ bool middle_mappingstart_p, middle_mappingend_p;
+ bool fallback_mappingstart_p, fallback_mappingend_p;
+ bool good_start_p, good_end_p, favor_right_p = true;
+ bool novelp; /* Want any of the segments in starti..(endi-1) to not be used */
+ bool pairablep; /* Want any of the segments in starti..(endi-1) to be pairable */
- debug13(printf("GMAP improvement: Entering align_singleend_with_gmap with hittype %s\n",
- Stage3end_hittype_string(hit)));
+ List_T p;
+ Segment_T anchor_segment;
+ int anchork, startk, endk, k;
- /* Was querylength5 - Stage3end_matches(hit5) > 5 */
- if (Stage3end_hittype(hit) == GMAP) {
- /* Skip */
- debug13(printf("Skipping hit of type GMAP\n"));
- new_result = List_push(new_result,(void *) hit);
- } else if (Stage3end_improved_by_gmap_p(hit) == true) {
- /* Skip */
- debug13(printf("Skipping hit already improved by GMAP\n"));
- new_result = List_push(new_result,(void *) hit);
+ anchork = 0;
+ for (p = anchor_segments; p != NULL; p = List_next(p)) {
+ anchor_segment = (Segment_T) List_head(p);
+ assert(anchor_segment->diagonal != (Univcoord_T) -1);
+ while (minus_segments[anchork].diagonal != anchor_segment->diagonal) {
+ anchork++;
+ }
-#if 0
- /* Don't skip on final align_singleend_with_gmap */
- } else if (Stage3end_hittype(hit) == TERMINAL) {
- /* Skip */
- debug13(printf("Skipping hit of type TERMINAL\n"));
- new_result = List_push(new_result,(void *) hit);
-#endif
+ novelp = (anchor_segment->usedp == true) ? false : true;
+ pairablep = anchor_segment->pairablep;
+ anchor_segment->usedp = true;
- } else if (querylength - Stage3end_nmatches_posttrim(hit) <= user_maxlevel) {
- /* Skip */
- debug13(printf("Skipping hit with nmismatches %d - %d <= user_maxlevel %d\n",
- querylength,Stage3end_nmatches_posttrim(hit),user_maxlevel));
- new_result = List_push(new_result,(void *) hit);
+ startk = anchork - 1;
+ while (startk >= 0 && minus_segments[startk].diagonal != (Univcoord_T) -1 &&
+ minus_segments[startk].diagonal + shortsplicedist > anchor_segment->diagonal) {
+ if (minus_segments[startk].usedp == false) {
+ novelp = true;
+ }
+ minus_segments[startk].usedp = true;
+ if (minus_segments[startk].pairablep == true) {
+ pairablep = true;
+ }
+ startk--;
+ }
- } else if (Stage3end_terminal_trim(hit) <= GMAP_TERMINAL_TRIM
- && Stage3end_contains_known_splicesite(hit) == false
- ) {
- debug13(printf("Skipping good hit\n"));
- new_result = List_push(new_result,(void *) hit);
+ endk = anchork + 1;
+ while (endk < minus_nsegments && minus_segments[endk].diagonal < anchor_segment->diagonal + shortsplicedist) {
+ if (minus_segments[endk].usedp == false) {
+ novelp = true;
+ }
+ minus_segments[endk].usedp = true;
+ if (minus_segments[endk].pairablep == true) {
+ pairablep = true;
+ }
+ endk++;
+ }
+
+ if (novelp == true && (pairablep == true || require_pairing_p == false)) {
+ debug13(printf("Processing segments %d to %d inclusive\n",startk+1,endk-1));
+ chrnum = anchor_segment->chrnum;
+ chroffset = anchor_segment->chroffset;
+ chrhigh = anchor_segment->chrhigh;
+ chrlength = anchor_segment->chrlength;
+
+ left = anchor_segment->diagonal - querylength; /* FORMULA */
+ origlow = left - (querylength - anchor_segment->querypos3);
+ orighigh = left + anchor_segment->querypos5;
+
+ /* extend right */
+ knownsplice_limit_low = subtract_bounded(origlow,shortsplicedist,chroffset);
+ mappingstart = segmentstart = subtract_bounded(origlow,shortsplicedist,chroffset);
+ debug13(printf("Original bounds C: knownsplice_limit_low %u, mappingstart %u\n",
+ knownsplice_limit_low - chroffset,mappingstart - chroffset));
+
+ /* extend left */
+ knownsplice_limit_high = add_bounded(orighigh,shortsplicedist,chrhigh);
+ mappingend = segmentend = add_bounded(orighigh,shortsplicedist,chrhigh);
+ debug13(printf("Original bounds D: knownsplice_limit_high %u, mappingend %u\n",
+ knownsplice_limit_high - chroffset,mappingend - chroffset));
+
+ close_mappingstart_last = middle_mappingstart_last = origlow;
+ close_mappingend_last = middle_mappingend_last = orighigh;
+ close_mappingstart_p = close_mappingend_p = false;
+ middle_mappingstart_p = middle_mappingend_p = false;
+
+ /* 1 */
+ for (k = startk + 1; k < endk; k++) {
+ debug13(printf("1. minus diagonal %u (%llu), querypos %d..%d, usedp %d, pairablep %d\n",
+ (Chrpos_T) (minus_segments[k].diagonal - chroffset),(unsigned long long) minus_segments[k].diagonal,
+ minus_segments[k].querypos5,minus_segments[k].querypos3,minus_segments[k].usedp,minus_segments[k].pairablep));
+ if (query_lastpos - minus_segments[k].querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 2. Missing end of query, so there could be a middle splice */
+ debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ query_lastpos,minus_segments[k].querypos3,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = subtract_bounded(minus_segments[k].diagonal,querylength + shortsplicedist,chroffset)) < middle_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ middle_mappingstart_last = mappingpos;
+ middle_mappingstart_p = true;
+ debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
+ }
+
+ } else {
+ debug13b(printf(" query_lastpos %d - querypos3 %d < %d + %d, so using this diagonal\n",
+ query_lastpos,minus_segments[k].querypos3,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = subtract_bounded(minus_segments[k].diagonal,querylength,chroffset)) < close_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ close_mappingstart_last = mappingpos;
+ close_mappingstart_p = true;
+ debug13(printf(" Redefining close mappingstart last to %u\n",close_mappingstart_last - chroffset));
+ }
+ }
+
- } else {
- debug13(printf("To correct hit terminalp %d or known_splicesite %d, running GMAP on 5' to match with 3' end\n",
- Stage3end_hittype(hit) == TERMINAL,
- Stage3end_contains_known_splicesite(hit)));
-
- /* Want high quality because we already have a pretty good answer */
- gmap_hits = align_single_hit_with_gmap(gmap_history,hit,
- /*extend_left_p*/true,/*extend_right_p*/true,
- queryuc_ptr,querylength,query_lastpos,
-#ifdef END_KNOWNSPLICING_SHORTCUT
- queryrc,Shortread_invertedp(queryseq),
-#endif
- query_compress_fwd,query_compress_rev,
- this->plus_segments,this->plus_nsegments,
- this->minus_segments,this->minus_nsegments,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel,genestrand,first_read_p);
-
- gmap_better_p = false;
- missing_hit = querylength - Stage3end_nmatches_posttrim(hit);
- for (a = gmap_hits; a != NULL; a = List_next(a)) {
- gmap = (Stage3end_T) List_head(a);
- missing_gmap = querylength - Stage3end_nmatches_posttrim(gmap);
- if (Stage3end_score(gmap) > cutoff_level + gmap_allowance) {
- debug13(printf("Score is only %d vs cutoff level %d\n",Stage3end_score(gmap),cutoff_level));
- Stage3end_free(&gmap);
- } else if (missing_gmap < missing_hit/2) {
- debug13(printf("GMAP with %d matches, %d missing is significantly better than hit with %d matches, %d missing\n",
- Stage3end_nmatches_posttrim(gmap),missing_gmap,Stage3end_nmatches_posttrim(hit),missing_hit));
- gmap_better_p = true;
- new_result = List_push(new_result,(void *) gmap);
- Stage3end_set_improved_by_gmap(hit);
+ if (minus_segments[k].querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 4. Missing start of query, so there could be a middle splice */
+ debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ minus_segments[k].querypos5,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = add_bounded(minus_segments[k].diagonal,shortsplicedist,chrhigh)) > middle_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ middle_mappingend_last = mappingpos;
+ middle_mappingend_p = true;
+ debug13(printf(" Redefining middle mappingend last to %u\n",middle_mappingend_last - chroffset));
+ }
+
} else {
- debug13(printf("GMAP with %d matches, %d missing is not significantly better than hit with %d matches, %d missing\n",
- Stage3end_nmatches_posttrim(gmap),missing_gmap,Stage3end_nmatches_posttrim(hit),missing_hit));
- Stage3end_free(&gmap);
+ debug13b(printf(" querypos5 %d < %d + %d, so using this diagonal\n",
+ minus_segments[k].querypos5,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = minus_segments[k].diagonal) > close_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ close_mappingend_last = mappingpos;
+ close_mappingend_p = true;
+ debug13(printf(" Redefining close mappingend last to %u\n",close_mappingend_last - chroffset));
+ }
}
}
- List_free(&gmap_hits);
-
- if (gmap_better_p == false) {
- new_result = List_push(new_result,(void *) hit);
+
+ /* 2 */
+ if (close_mappingstart_p == true) {
+ close_knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
+ } else if (middle_mappingstart_p == true) {
+ debug13(printf("Using middle mappingstart\n"));
+ close_knownsplice_limit_low = middle_mappingstart_last;
+ close_mappingstart_last = middle_mappingstart_last;
+ close_mappingstart_p = true;
+ }
+ if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_last) {
+ knownsplice_limit_low = middle_mappingstart_last;
+ mappingstart = middle_mappingstart_last;
+ }
+ if (close_mappingstart_p == false) {
+ fallback_mappingstart_p = false;
} else {
- Stage3end_free(&hit);
+ debug13(printf("Fallback mappingstart = %u\n",mappingstart - chroffset));
+ fallback_mappingstart_p = true;
+ }
+
+ /* 3 */
+ if (close_mappingend_p == true) {
+ close_knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
+ } else if (middle_mappingend_p == true) {
+ debug13(printf("Using middle mappingend\n"));
+ close_knownsplice_limit_high = middle_mappingend_last;
+ close_mappingend_last = middle_mappingend_last;
+ close_mappingend_p = true;
+ }
+
+ if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_last) {
+ knownsplice_limit_high = middle_mappingend_last;
+ mappingend = middle_mappingend_last;
+ }
+ if (close_mappingend_p == false) {
+ fallback_mappingend_p = false;
+ } else {
+ debug13(printf("Fallback mappingend = %u\n",mappingend - chroffset));
+ fallback_mappingend_p = true;
+ }
+
+ /* 4 */
+ if (close_mappingstart_p == true && close_mappingend_p == true) {
+ debug13(printf("Single hit: Running gmap with close mappingstart and close mappingend\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+ /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
+ query_compress_fwd,query_compress_rev,
+ close_mappingstart_last,close_mappingend_last,
+ close_knownsplice_limit_low,close_knownsplice_limit_high,
+ /*plusp*/false,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+
+ if (good_start_p == true && good_end_p == true) {
+ /* Success */
+ } else if (gmap_rerun_p == false) {
+ debug13(printf("Skipping re-run of gmap\n"));
+ } else if (good_start_p == true) {
+ if (fallback_mappingend_p == true) {
+ debug13(printf("Single hit: Re-running gmap with close mappingstart only\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+ /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
+ query_compress_fwd,query_compress_rev,close_mappingstart_last,mappingend,
+ close_knownsplice_limit_low,knownsplice_limit_high,
+ /*plusp*/false,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ }
+ } else if (good_end_p == true) {
+ if (fallback_mappingstart_p == true) {
+ debug13(printf("Single hit: Re-running gmap with close mappingend only\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+ /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
+ query_compress_fwd,query_compress_rev,mappingstart,close_mappingend_last,
+ knownsplice_limit_low,close_knownsplice_limit_high,
+ /*plusp*/false,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ }
+ } else {
+ if (fallback_mappingstart_p == true && fallback_mappingend_p == true) {
+ debug13(printf("Single hit: Re-running gmap with far mappingstart and mappingend\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+ /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
+ query_compress_fwd,query_compress_rev,mappingstart,mappingend,
+ knownsplice_limit_low,close_knownsplice_limit_high,
+ /*plusp*/false,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ }
+ }
+
+ } else if (close_mappingstart_p == true) {
+ debug13(printf("Single hit: Running gmap with close mappingstart\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+ /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
+ query_compress_fwd,query_compress_rev,close_mappingstart_last,mappingend,
+ close_knownsplice_limit_low,knownsplice_limit_high,
+ /*plusp*/false,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ if (good_start_p == true) {
+ /* Success */
+ } else if (gmap_rerun_p == false) {
+ debug13(printf("Skipping re-run of gmap\n"));
+ } else if (fallback_mappingstart_p == true) {
+ debug13(printf("Single hit: Re-running gmap with far mappingstart\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+ /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
+ query_compress_fwd,query_compress_rev,mappingstart,mappingend,
+ knownsplice_limit_low,knownsplice_limit_high,
+ /*plusp*/false,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ }
+
+ } else if (close_mappingend_p == true) {
+ debug13(printf("Single hit: Running gmap with close mappingend\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+ /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
+ query_compress_fwd,query_compress_rev,mappingstart,close_mappingend_last,
+ knownsplice_limit_low,close_knownsplice_limit_high,
+ /*plusp*/false,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ if (good_end_p == true) {
+ /* Success */
+ } else if (gmap_rerun_p == false) {
+ debug13(printf("Skipping re-run of gmap\n"));
+ } else if (fallback_mappingend_p == true) {
+ debug13(printf("Single hit: Re-running gmap with far mappingend\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+ /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
+ query_compress_fwd,query_compress_rev,mappingstart,mappingend,
+ knownsplice_limit_low,knownsplice_limit_high,
+ /*plusp*/false,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ }
+
+ } else {
+ debug13(printf("Single hit: Running gmap with far mappingstart and mappingend\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+ /*sense_try*/0,favor_right_p,/*paired_favor_mode*/0,/*zero_offset*/0,
+ query_compress_fwd,query_compress_rev,mappingstart,mappingend,
+ knownsplice_limit_low,knownsplice_limit_high,
+ /*plusp*/false,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
}
}
}
-
- for ( ; p != NULL; p = p->rest) {
- hit = (Stage3end_T) List_head(p);
- new_result = List_push(new_result,(void *) hit);
- }
-
- List_free(&result);
- return new_result;
+
+ return hits;
}
-
-
-/* done_level should probably be renamed final_level. opt_level
- should probably be renamed found_level or opt_level. */
+#endif
+
+
static List_T
-align_end (int *cutoff_level, History_T gmap_history, T this,
- Compress_T query_compress_fwd, Compress_T query_compress_rev,
- char *queryuc_ptr, char *queryrc, int querylength, int query_lastpos,
- Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev, int indexdb_size_threshold, Floors_T *floors_array,
-
- Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
- Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
- Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+convert_plus_segments_to_gmap (History_T gmap_history, List_T hits,
+ char *accession, char *queryuc_ptr, int querylength, int query_lastpos,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ char *queryrc, bool invertedp,
+#endif
+ Compress_T query_compress_fwd, Compress_T query_compress_rev,
+ List_T anchor_segments, struct Segment_T *plus_segments, int plus_nsegments,
+ Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+ Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+ Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ int user_maxlevel, int genestrand, bool first_read_p,
+ bool require_pairing_p) {
+ Univcoord_T chroffset, chrhigh, mappingpos;
+ Univcoord_T origlow, orighigh;
+ Univcoord_T close_mappingstart_last, close_mappingend_last,
+ middle_mappingstart_last, middle_mappingend_last;
+ Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
+ Univcoord_T close_knownsplice_limit_low, close_knownsplice_limit_high;
+ Chrpos_T chrlength;
+ Chrnum_T chrnum;
+ bool close_mappingstart_p = false, close_mappingend_p = false;
+ bool middle_mappingstart_p = false, middle_mappingend_p = false;
+ bool novelp; /* Want any of the segments in startk..(endk-1) to not be used */
+ bool pairablep; /* Want any of the segments in startk..(endk-1) to be pairable */
- int user_maxlevel, int indel_penalty_middle, int indel_penalty_end,
- int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
- bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
- bool allvalidp, bool keep_floors_p, int genestrand, bool first_read_p) {
- List_T hits, subs = NULL, indels = NULL, new_indels,
- ambiguous = NULL, singlesplicing = NULL, doublesplicing = NULL, shortendsplicing = NULL,
- longsinglesplicing = NULL, distantsplicing = NULL, good_gmap_hits = NULL, terminals = NULL;
- List_T gmap_hits, p, a;
- Stage3end_T hit, gmap;
- int nmisses_allowed_sarray;
- int found_score, done_level, opt_level, fast_level, mismatch_level, nmismatches, max_mismatches_allowed;
- int max_splice_mismatches, i;
- int missing_hit, missing_gmap;
- int nhits = 0, nsplicepairs = 0;
- List_T *donors_plus, *antidonors_plus, *acceptors_plus, *antiacceptors_plus,
- *donors_minus, *antidonors_minus, *acceptors_minus, *antiacceptors_minus;
- bool any_omitted_p, ambiguousp, alloc_floors_p = false, floors_computed_p = false;
- Floors_T floors;
- bool segments_computed_p = false, gmap_better_p, extend_left_p, extend_right_p;
- Indexdb_T plus_indexdb, minus_indexdb;
+ List_T p;
+ Segment_T anchor_segment, segment;
+ int anchork, startk, endk, n, i, j, firstj, lastj, k, best_starti, best_endi;
- if (genestrand == +2) {
- plus_indexdb = indexdb_rev;
- minus_indexdb = indexdb_fwd;
- } else {
- plus_indexdb = indexdb_fwd;
- minus_indexdb = indexdb_rev;
- }
+ Stage3end_T hit;
+ Pair_T *array;
+ struct Pair_T *pairarray;
+ List_T pairs, stage2pairs, unsorted_pairs;
+ int querypos, boundpos, seglength;
+ Chrpos_T genomepos;
+ char comp, c, g, g_alt;
+ char *gsequence_orig, *gsequence_alt;
- found_score = querylength;
- fast_level = (querylength + index1interval - 1)/spansize - NREQUIRED_FAST;
- debug(printf("fast_level %d = (querylength %d + index1interval %d - 1)/spansize %d - nrequired_fast %d\n",
- fast_level,querylength,index1interval,spansize,NREQUIRED_FAST));
+ Segment_T *sorted, *sorted_allocated;
+ int *scores, *scores_allocated, best_score, score;
+ int *prev_left, *prev_right, *prev_allocated, besti;
-#if 0
- /* This prevents complete_mm procedure, needed for short reads */
- if (fast_level < 1 && user_maxlevel < 0) {
- debug(printf("Changing fast_level to 0\n"));
- fast_level = 1; /* Do at least 1 mismatch */
- }
-#endif
+ int sensedir;
+ int npairs, goodness, cdna_direction, matches, nmatches_posttrim,
+ max_match_length, ambig_end_length_5, ambig_end_length_3,
+ unknowns, mismatches, qopens, qindels, topens, tindels,
+ ncanonical, nsemicanonical, nnoncanonical;
+ double ambig_prob_5, ambig_prob_3, min_splice_prob;
+ Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
+ Univcoord_T start, end, left;
+ int nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
- if (user_maxlevel >= 0) {
- *cutoff_level = user_maxlevel;
- } else if (fast_level >= 0) {
- *cutoff_level = fast_level;
- } else {
- *cutoff_level = 0;
- }
- debug(printf("cutoff_level = %d\n",*cutoff_level));
- if (user_maxlevel < 0) {
- if (fast_level >= 0) {
- user_maxlevel = fast_level;
+ if (plus_nsegments > 0) {
+#ifdef HAVE_ALLOCA
+ if (plus_nsegments < MAX_ALLOCATION) {
+ prev_allocated = (int *) ALLOCA(plus_nsegments*sizeof(int));
+ scores_allocated = (int *) ALLOCA(plus_nsegments*sizeof(int));
+ sorted_allocated = (Segment_T *) ALLOCA(plus_nsegments*sizeof(Segment_T));
} else {
- user_maxlevel = 0;
+ prev_allocated = (int *) MALLOC(plus_nsegments*sizeof(int));
+ scores_allocated = (int *) MALLOC(plus_nsegments*sizeof(int));
+ sorted_allocated = (Segment_T *) MALLOC(plus_nsegments*sizeof(Segment_T));
}
- }
- debug(printf("user_maxlevel = %d\n",user_maxlevel));
-
-#if 0
- if (dibasep) {
- opt_level = querylength; /* Allow extra because color errors may exceed nt errors */
- }
+ gsequence_orig = (char *) MALLOCA((querylength+1) * sizeof(char));
+ gsequence_alt = (char *) MALLOCA((querylength+1) * sizeof(char));
+#else
+ prev_allocated = (int *) MALLOC(plus_nsegments*sizeof(int));
+ scores_allocated = (int *) MALLOC(plus_nsegments*sizeof(int));
+ sorted_allocated = (Segment_T *) MALLOC(plus_nsegments*sizeof(Segment_T));
+ gsequence_orig = (char *) MALLOC((querylength+1) * sizeof(char));
+ gsequence_alt = (char *) MALLOC((querylength+1) * sizeof(char));
#endif
- opt_level = user_maxlevel;
- done_level = user_maxlevel /* + subopt_levels. -- Initially the same */;
- debug(printf("0> opt_level %d, done_level %d\n",opt_level,done_level));
+ }
- nhits = 0;
+ anchork = 0;
+ for (p = anchor_segments; p != NULL; p = List_next(p)) {
+ anchor_segment = (Segment_T) List_head(p);
+ assert(anchor_segment->diagonal != (Univcoord_T) -1);
+ while (plus_segments[anchork].diagonal != anchor_segment->diagonal) {
+ anchork++;
+ }
- nmisses_allowed_sarray = *cutoff_level;
+ startk = anchork - 1;
+ while (startk >= 0 && plus_segments[startk].diagonal != (Univcoord_T) -1 &&
+ plus_segments[startk].diagonal + shortsplicedist > anchor_segment->diagonal) {
+ startk--;
+ }
-#ifndef LARGE_GENOMES
- if (use_only_sarray_p == true) {
- Sarray_search_greedy(&(*cutoff_level),&subs,&indels,&ambiguous,&singlesplicing,&doublesplicing,
- queryuc_ptr,queryrc,querylength,query_compress_fwd,query_compress_rev,
- nmisses_allowed_sarray,genestrand,first_read_p);
- singlesplicing = Splice_group_by_segmenti(&found_score,singlesplicing,&ambiguous,querylength,
- first_read_p,/*sarrayp*/true);
- singlesplicing = Splice_group_by_segmentj(&found_score,singlesplicing,&ambiguous,querylength,
- first_read_p,/*sarrayp*/true);
- singlesplicing = List_append(singlesplicing,ambiguous);
+ endk = anchork + 1;
+ while (endk < plus_nsegments && plus_segments[endk].diagonal < anchor_segment->diagonal + shortsplicedist) {
+ endk++;
+ }
+ debug13(printf("%s read: Found plus segments %d to %d inclusive for anchor %d\n",
+ first_read_p ? "First" : "Second",startk+1,endk-1,anchork));
- hits = List_append(subs,List_append(indels,List_append(singlesplicing,doublesplicing)));
-#if 0
- hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
- querylength,/*keep_gmap_p*/true,/*finalp*/true);
-#endif
- hits = Stage3end_remove_overlaps(hits,/*finalp*/true);
- hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
- querylength,/*keep_gmap_p*/false,/*finalp*/true);
- hits = Stage3end_resolve_multimapping(hits);
+ /* Dynamic programming on left (low) side (querypos5) */
+ if ((n = (anchork - 1) - (startk + 1) + 1) == 0) {
+ best_starti = -1;
+ } else {
+ prev_left = &(prev_allocated[startk+1]);
+ scores = &(scores_allocated[startk+1]);
+ sorted = &(sorted_allocated[startk+1]);
+
+ for (k = startk + 1, i = 0; k < anchork; k++) {
+ sorted[i++] = &(plus_segments[k]);
+ }
+ qsort(sorted,n,sizeof(Segment_T),Segment_querypos5_ascending_cmp);
+
+ lastj = 0;
+ while (lastj < n && sorted[lastj]->querypos5 < anchor_segment->querypos5) {
+ lastj++;
+ }
+
+ for (j = 0; j < lastj; j++) {
+ best_score = 0;
+ besti = -1;
+ for (i = 0; i < j; i++) {
+ if (sorted[i]->lowpos >= sorted[j]->lowpos) {
+ /* Skip, since doesn't add nucleotides to left */
+ } else if (sorted[i]->highpos < sorted[j]->lowpos) {
+ if ((score = (sorted[i]->highpos - sorted[i]->lowpos)) > best_score) {
+ best_score = score;
+ besti = i;
+ }
+ } else if ((score = (sorted[j]->lowpos - sorted[i]->lowpos)) > best_score) {
+ best_score = score;
+ besti = i;
+ }
+ }
+ scores[j] = sorted[j]->highpos - sorted[j]->lowpos;
+ debug13(printf("Best prev is %d with score %d\n",besti,best_score));
+ if ((prev_left[j] = besti) >= 0) {
+ scores[j] += best_score;
+ }
+ }
+
+ /* Anchor segment */
+ best_score = 0;
+ best_starti = -1;
+ for (i = 0; i < lastj; i++) {
+ if (sorted[i]->lowpos >= anchor_segment->lowpos) {
+ /* Skip, since doesn't add nucleotides to left */
+ } else if (sorted[i]->highpos < anchor_segment->lowpos) {
+ if ((score = (sorted[i]->highpos - sorted[i]->lowpos)) > best_score) {
+ best_score = score;
+ best_starti = i;
+ }
+ } else if ((score = (anchor_segment->lowpos - sorted[i]->lowpos)) > best_score) {
+ best_score = score;
+ best_starti = i;
+ }
+ }
+ }
- hits = Stage3end_remove_circular_alias(hits);
- hits = Stage3end_remove_duplicates(hits); /* Aliases can cause duplicates */
-
- return hits;
- } else if (use_sarray_p == true) {
- /* Replaces spanning set */
- Sarray_search_greedy(&found_score,&subs,&indels,&ambiguous,&singlesplicing,&doublesplicing,
- queryuc_ptr,queryrc,querylength,query_compress_fwd,query_compress_rev,
- nmisses_allowed_sarray,genestrand,first_read_p);
- singlesplicing = Splice_group_by_segmenti(&found_score,singlesplicing,&ambiguous,querylength,
- first_read_p,/*sarrayp*/true);
- singlesplicing = Splice_group_by_segmentj(&found_score,singlesplicing,&ambiguous,querylength,
- first_read_p,/*sarrayp*/true);
- singlesplicing = List_append(singlesplicing,ambiguous);
+ /* Dynamic programming on right (high) side (querypos3) */
+ if ((n = (endk - 1) - (anchork + 1) + 1) == 0) {
+ best_endi = -1;
+ } else {
+ prev_right = &(prev_allocated[anchork+1]);
+ scores = &(scores_allocated[anchork+1]);
+ sorted = &(sorted_allocated[anchork+1]);
+
+ for (k = anchork + 1, i = 0; k < endk; k++) {
+ sorted[i++] = &(plus_segments[k]);
+ }
+ qsort(sorted,n,sizeof(Segment_T),Segment_querypos3_ascending_cmp);
+
+ firstj = n - 1;
+ while (firstj >= 0 && sorted[firstj]->querypos3 > anchor_segment->querypos3) {
+ firstj--;
+ }
+
+ for (j = n - 1; j > firstj; j--) {
+ best_score = 0;
+ besti = -1;
+ for (i = n - 1; i > j; i--) {
+ if (sorted[i]->highpos <= sorted[i]->highpos) {
+ /* Skip, since doesn't add nucleotides to right */
+ } else if (sorted[i]->lowpos > sorted[j]->highpos) {
+ if ((score = (sorted[i]->highpos - sorted[i]->lowpos)) > best_score) {
+ best_score = score;
+ besti = i;
+ }
+ } else if ((score = (sorted[i]->highpos - sorted[j]->highpos)) > best_score) {
+ best_score = score;
+ besti = i;
+ }
+ }
+ scores[j] = sorted[j]->highpos - sorted[j]->lowpos;
+ debug13(printf("Best prev is %d with score %d\n",besti,best_score));
+ if ((prev_right[j] = besti) >= 0) {
+ scores[j] += best_score;
+ }
+ }
- opt_level = (found_score < opt_level) ? found_score : opt_level;
- if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
- done_level = user_maxlevel;
+ /* Anchor segment */
+ best_score = 0;
+ best_endi = -1;
+ for (i = n - 1; i > firstj; i--) {
+ if (sorted[i]->highpos <= anchor_segment->highpos) {
+ /* Skip, since doesn't add nucleotides to right */
+ } else if (sorted[i]->lowpos > anchor_segment->highpos) {
+ if ((score = (sorted[i]->highpos - sorted[i]->lowpos)) > best_score) {
+ best_score = score;
+ best_endi = i;
+ }
+ } else if ((score = (sorted[i]->highpos - anchor_segment->highpos)) > best_score) {
+ best_score = score;
+ best_endi = i;
+ }
+ }
}
- debug(printf("SA> opt_level %d, done_level %d\n",opt_level,done_level));
- } else {
-#endif
- /* 1. Exact. Requires compress if cmet or genomealt. Creates and uses spanning set. */
- mismatch_level = 0;
- if (allvalidp == false) {
- debug(printf("Not all oligos are valid, so cannot perform spanning set\n"));
- fast_level = -1;
- } else {
- debug(printf("fast_level = %d\n",fast_level));
- debug(printf("*** Stage 1. Exact ***\n"));
- subs = find_spanning_exact_matches(&found_score,&nhits,/*hits*/NULL,this,genestrand,first_read_p,
- querylength,query_lastpos,plus_indexdb,minus_indexdb,
- query_compress_fwd,query_compress_rev);
- opt_level = (found_score < opt_level) ? found_score : opt_level;
- if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
- done_level = user_maxlevel;
- }
- mismatch_level = 1;
- debug(printf("1> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
+ /* Evaluate set of segments */
+ novelp = pairablep = false;
+ if (anchor_segment->usedp == false) {
+ novelp = true;
+ }
+ if (anchor_segment->pairablep == true) {
+ pairablep = true;
}
- /* 2. One mismatch. Requires spanning set and compress. */
- if (allvalidp && querylength >= one_miss_querylength && done_level >= 1) {
- debug(printf("*** Stage 2. One miss ***\n"));
- subs = find_spanning_onemiss_matches(&found_score,&nhits,subs,this,genestrand,first_read_p,
- querylength,query_compress_fwd,query_compress_rev);
- opt_level = (found_score < opt_level) ? found_score : opt_level;
- if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
- done_level = user_maxlevel;
+ sorted = &(sorted_allocated[startk+1]);
+ for (k = best_starti; k >= 0; k = prev_left[k]) {
+ if (sorted[k]->usedp == false) {
+ novelp = true;
+ }
+ if (sorted[k]->pairablep == true) {
+ pairablep = true;
}
- mismatch_level = 2;
- debug(printf("2> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
}
- /* 3. Mismatches via spanning set. Requires spanning set and compress. */
- if (allvalidp && done_level >= 2) {
- while (mismatch_level <= fast_level && mismatch_level <= done_level) {
- debug(printf("*** Stage 3 (level %d). Spanning set mismatches ***\n",mismatch_level));
- subs = find_spanning_multimiss_matches(&found_score,&nhits,subs,this,genestrand,first_read_p,
- NREQUIRED_FAST,querylength,query_compress_fwd,query_compress_rev,
- /*nmisses_allowed*/mismatch_level);
- opt_level = (found_score < opt_level) ? found_score : opt_level;
- if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
- done_level = user_maxlevel;
- }
- mismatch_level++;
- debug(printf("3> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
+ sorted = &(sorted_allocated[anchork+1]);
+ for (k = best_endi; k >= 0; k = prev_right[k]) {
+ if (sorted[k]->usedp == false) {
+ novelp = true;
+ }
+ if (sorted[k]->pairablep == true) {
+ pairablep = true;
}
}
-#ifndef LARGE_GENOMES
- }
-#endif
+ debug13(printf("%s read: Processing plus segments %d to %d inclusive: novelp %d, pairablep %d\n",
+ first_read_p ? "First" : "Second",startk+1,endk-1,novelp,pairablep));
+ if (novelp == true && (pairablep == true || require_pairing_p == false)) {
+ anchor_segment->usedp = true;
+ chrnum = anchor_segment->chrnum;
+ chroffset = anchor_segment->chroffset;
+ chrhigh = anchor_segment->chrhigh;
+ chrlength = anchor_segment->chrlength;
+
+ left = anchor_segment->diagonal - querylength; /* FORMULA: Corresponds to querypos 0 */
+ origlow = left - anchor_segment->querypos5;
+ orighigh = left + (querylength - anchor_segment->querypos3);
- /* 4, 5. Complete set mismatches and indels, omitting frequent oligos */
- debug(printf("Testing done_level %d > fast_level %d\n",done_level,fast_level));
- if (use_sarray_p == true && (subs || indels || singlesplicing || doublesplicing)) {
- /* Skip. Suffix array already found something. Also, get memory errors if run both algorithms. */
+ /* extend left */
+ knownsplice_limit_low = subtract_bounded(origlow,shortsplicedist,chroffset);
+ debug13(printf("Original bounds A: knownsplice_limit_low %u\n",knownsplice_limit_low - chroffset));
+
+ /* extend right */
+ knownsplice_limit_high = add_bounded(orighigh,shortsplicedist,chrhigh);
+ debug13(printf("Original bounds B: knownsplice_limit_high %u\n",knownsplice_limit_high - chroffset));
+
+ close_mappingstart_last = middle_mappingstart_last = origlow;
+ close_mappingend_last = middle_mappingend_last = orighigh;
+ close_mappingstart_p = close_mappingend_p = false;
+ middle_mappingstart_p = middle_mappingend_p = false;
+
+ /* 1 */
+ sorted = &(sorted_allocated[startk+1]);
+ for (k = best_starti; k >= 0; k = prev_left[k]) {
+ segment = sorted[k];
+ segment->usedp = true;
+ debug13(printf("1. plus diagonal %u (%llu), querypos %d..%d, usedp %d, pairablep %d\n",
+ (Chrpos_T) (segment->diagonal - chroffset),(unsigned long long) segment->diagonal,
+ segment->querypos5,segment->querypos3,segment->usedp,segment->pairablep));
+ if (segment->querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 3. Missing start of query, so there could be a middle splice */
+ debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ segment->querypos5,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = subtract_bounded(segment->diagonal,querylength + shortsplicedist,chroffset)) < middle_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ middle_mappingstart_last = mappingpos;
+ middle_mappingstart_p = true;
+ debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
+ }
- } else if (done_level > fast_level || done_level >= indel_penalty_middle || done_level >= indel_penalty_end) {
-#if 1
- floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,querylength,query_lastpos,
- plus_indexdb,minus_indexdb,indexdb_size_threshold,max_end_insertions,
- /*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
- floors_computed_p = true;
- complete_set_mm_indels(&found_score,&segments_computed_p,
- &opt_level,&done_level,user_maxlevel,/*revise_levels_p*/true,
- &nhits,&subs,&indels,this,query_compress_fwd,query_compress_rev,
-#if defined(DEBUG2) || defined(DEBUG2E)
- queryuc_ptr,queryrc,
-#endif
- querylength,query_lastpos,floors,indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- fast_level,genestrand,first_read_p);
-#else
- /* Using obsolete masktype */
- if (masktype == MASK_NONE) {
- debug(printf("*** Stage 4,5. Complete mm/indels with no masking with done_level %d ***\n",done_level));
- complete_set_mm_indels(&found_score,&segments_computed_p,
- &any_omitted_p,&opt_level,&done_level,user_maxlevel,/*revise_levels_p*/true,
- &nhits,&subs,&indels,this,query_compress_fwd,query_compress_rev,
-#if defined(DEBUG2) || defined(DEBUG2E)
- queryuc_ptr,queryrc,
-#endif
- querylength,query_lastpos,plus_indexdb,minus_indexdb,indexdb_size_threshold,
- floors_array,indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- fast_level,/*omit_frequent_p*/false,/*omit_repetitive_p*/false,keep_floors_p,
- genestrand,first_read_p);
- } else {
- debug(printf("*** Stage 4,5. Complete mm/indels masking frequent oligos with done_level %d ***\n",done_level));
- complete_set_mm_indels(&found_score,&segments_computed_p,
- &any_omitted_p,&opt_level,&done_level,user_maxlevel,/*revise_levels_p*/true,
- &nhits,&subs,&indels,this,query_compress_fwd,query_compress_rev,
-#if defined(DEBUG2) || defined(DEBUG2E)
- queryuc_ptr,queryrc,
-#endif
- querylength,query_lastpos,plus_indexdb,minus_indexdb,indexdb_size_threshold,
- floors_array,indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- fast_level,/*omit_frequent_p*/true,
- /*omit_repetitive_p*/(masktype == MASK_REPETITIVE || masktype == MASK_GREEDY_REPETITIVE) ? true : false,
- keep_floors_p,genestrand,first_read_p);
- if ((masktype == MASK_GREEDY_FREQUENT || masktype == MASK_GREEDY_REPETITIVE) && subs == NULL && indels == NULL && any_omitted_p == true) {
- FREE(this->minus_segments);
- FREE(this->plus_segments);
-
- debug(printf("*** Stage 4,5. Complete mm/indels with no masking with done_level %d ***\n",done_level));
- complete_set_mm_indels(&found_score,&segments_computed_p,
- &any_omitted_p,&opt_level,&done_level,user_maxlevel,/*revise_levels_p*/true,
- &subs,&indels,this,query_compress_fwd,query_compress_rev,
-#if defined(DEBUG2) || defined(DEBUG2E)
- queryuc_ptr,queryrc,
-#endif
- querylength,query_lastpos,plus_indexdb,minus_indexdb,indexdb_size_threshold,
- floors_array,indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- fast_level,/*omit_frequent_p*/false,/*omit_repetitive_p*/false,keep_floors_p,
- genestrand,first_read_p);
- }
- }
-#endif
- }
+ } else {
+ debug13b(printf(" querypos5 %d < %d + %d, so using this diagonal\n",
+ segment->querypos5,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = subtract_bounded(segment->diagonal,querylength,chroffset)) < close_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ close_mappingstart_last = mappingpos;
+ close_mappingstart_p = true;
+ debug13(printf(" Redefining close mappingstart last to %u\n",close_mappingstart_last - chroffset));
+ }
+ }
- /* 6, 7, 8, 9. Splicing. Requires compress and all positions fetched */
- if (use_sarray_p == true && (subs || indels || singlesplicing || doublesplicing)) {
- /* Skip. Suffix array already found something. Also, get memory errors if run both algorithms. */
- } else if (knownsplicingp || novelsplicingp) {
- /* 6. Single splicing */
- debug(printf("Deciding whether to do singlesplicing: done_level %d >=? localsplicing_penalty %d\n",
- done_level,localsplicing_penalty));
- if (done_level >= localsplicing_penalty) {
- debug(printf("*** Stage 6. Single splicing masking frequent oligos with done_level %d ***\n",done_level));
- /* Always mask frequent oligos for splicing, which must be transcriptional */
- if (floors_computed_p == false) {
- floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,querylength,query_lastpos,
- plus_indexdb,minus_indexdb,indexdb_size_threshold,max_end_insertions,
- /*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
- floors_computed_p = true;
- }
+ if (query_lastpos - segment->querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 1. Missing end of query, so there could be a middle splice */
+ debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ query_lastpos,segment->querypos3,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = add_bounded(segment->diagonal,shortsplicedist,chrhigh)) > middle_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ middle_mappingend_last = mappingpos;
+ middle_mappingend_p = true;
+ debug13(printf(" Redefining middle mappingend last to %u\n",middle_mappingend_last - chroffset));
+ }
- if (segments_computed_p == false) {
- this->plus_segments = identify_all_segments(&this->plus_nsegments,&this->plus_spliceable,&this->plus_nspliceable,
-#ifdef LARGE_GENOMES
- this->plus_positions_high,this->plus_positions_low,
-#else
- this->plus_positions,
-#endif
- this->plus_npositions,this->omitted,querylength,query_lastpos,floors,
- /*plusp*/true);
- this->minus_segments = identify_all_segments(&this->minus_nsegments,&this->minus_spliceable,&this->minus_nspliceable,
-#ifdef LARGE_GENOMES
- this->minus_positions_high,this->minus_positions_low,
-#else
- this->minus_positions,
-#endif
- this->minus_npositions,this->omitted,querylength,query_lastpos,floors,
- /*plusp*/false);
- segments_computed_p = true;
+ } else {
+ debug13b(printf(" query_lastpos %d - querypos3 %d < %d + %d, so using this diagonal\n",
+ query_lastpos,segment->querypos3,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = segment->diagonal) > close_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ close_mappingend_last = mappingpos;
+ close_mappingend_p = true;
+ debug13(printf(" Redefining close mappingend last to %u\n",close_mappingend_last - chroffset));
+ }
+ }
}
- singlesplicing = complete_set_singlesplicing(&found_score,singlesplicing,floors,this,
- query_compress_fwd,query_compress_rev,
- querylength,query_lastpos,
- localsplicing_penalty,
- /*max_mismatches_allowed*/done_level - localsplicing_penalty,
- genestrand,first_read_p,
- /*subs_or_indels_p*/(subs != NULL || indels != NULL) ? true : false);
+ sorted = &(sorted_allocated[anchork+1]);
+ for (k = best_endi; k >= 0; k = prev_right[k]) {
+ segment = sorted[k];
+ segment->usedp = true;
+ debug13(printf("1. plus diagonal %u (%llu), querypos %d..%d, usedp %d, pairablep %d\n",
+ (Chrpos_T) (segment->diagonal - chroffset),(unsigned long long) segment->diagonal,
+ segment->querypos5,segment->querypos3,segment->usedp,segment->pairablep));
+ if (segment->querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 3. Missing start of query, so there could be a middle splice */
+ debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ segment->querypos5,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = subtract_bounded(segment->diagonal,querylength + shortsplicedist,chroffset)) < middle_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ middle_mappingstart_last = mappingpos;
+ middle_mappingstart_p = true;
+ debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
+ }
-#if 0
- /* Mark ambiguous splices only for single-end reads */
- singlesplicing = Stage3end_mark_ambiguous_splices(&ambiguousp,singlesplicing);
-#endif
- singlesplicing = Stage3end_optimal_score(singlesplicing,/*cutoff_level*/opt_level,subopt_levels,
- query_compress_fwd,query_compress_rev,querylength,
- /*keep_gmap_p*/true,/*finalp*/false);
+ } else {
+ debug13b(printf(" querypos5 %d < %d + %d, so using this diagonal\n",
+ segment->querypos5,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = subtract_bounded(segment->diagonal,querylength,chroffset)) < close_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ close_mappingstart_last = mappingpos;
+ close_mappingstart_p = true;
+ debug13(printf(" Redefining close mappingstart last to %u\n",close_mappingstart_last - chroffset));
+ }
+ }
- if (singlesplicing) {
- opt_level = (found_score < opt_level) ? found_score : opt_level;
- if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
- done_level = user_maxlevel;
+
+ if (query_lastpos - segment->querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 1. Missing end of query, so there could be a middle splice */
+ debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ query_lastpos,segment->querypos3,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = add_bounded(segment->diagonal,shortsplicedist,chrhigh)) > middle_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ middle_mappingend_last = mappingpos;
+ middle_mappingend_p = true;
+ debug13(printf(" Redefining middle mappingend last to %u\n",middle_mappingend_last - chroffset));
+ }
+
+ } else {
+ debug13b(printf(" query_lastpos %d - querypos3 %d < %d + %d, so using this diagonal\n",
+ query_lastpos,segment->querypos3,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = segment->diagonal) > close_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ close_mappingend_last = mappingpos;
+ close_mappingend_p = true;
+ debug13(printf(" Redefining close mappingend last to %u\n",close_mappingend_last - chroffset));
+ }
}
}
- }
- /* 7. Double splicing */
- debug(printf("Deciding whether to do doublesplicing: done_level %d >=? localsplicing_penalty %d\n",
- done_level,localsplicing_penalty));
- if (done_level >= localsplicing_penalty) {
- debug(printf("*** Stage 7. Double splicing masking frequent oligos with done_level %d ***\n",done_level));
- if (floors_computed_p == false) {
- floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,querylength,query_lastpos,
- plus_indexdb,minus_indexdb,indexdb_size_threshold,max_end_insertions,
- /*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
- floors_computed_p = true;
- }
- doublesplicing = complete_set_doublesplicing(&found_score,doublesplicing,floors,this,
- query_compress_fwd,query_compress_rev,
- queryuc_ptr,queryrc,querylength,query_lastpos,
- localsplicing_penalty,min_shortend,
- /*max_mismatches_allowed*/done_level - localsplicing_penalty,
- /*pairedp*/false,genestrand,first_read_p,
- /*subs_or_indels_p*/(subs != NULL || indels != NULL) ? true : false);
-
-#if 0
- /* Mark ambiguous splices only for single-end reads */
- doublesplicing = Stage3end_mark_ambiguous_splices(&ambiguousp,doublesplicing);
-#endif
- doublesplicing = Stage3end_optimal_score(doublesplicing,/*cutoff_level*/opt_level,subopt_levels,
- query_compress_fwd,query_compress_rev,querylength,
- /*keep_gmap_p*/true,/*finalp*/false);
- if (doublesplicing) {
- opt_level = (found_score < opt_level) ? found_score : opt_level;
- if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
- done_level = user_maxlevel;
- }
+ /* 2 */
+ if (close_mappingstart_p == true) {
+ close_knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
+ } else if (middle_mappingstart_p == true) {
+ debug13(printf("Using middle mappingstart\n"));
+ close_knownsplice_limit_low = middle_mappingstart_last;
+ close_mappingstart_last = middle_mappingstart_last;
+ close_mappingstart_p = true;
}
- }
- if (knownsplicingp == true && done_level >= localsplicing_penalty) {
- /* Want >= and not > to give better results. Negligible effect on speed. */
- /* 8. Shortend splicing */
+ if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_last) {
+ knownsplice_limit_low = middle_mappingstart_last;
+ }
- max_splice_mismatches = done_level - localsplicing_penalty;
- debug(printf("*** Stage 8. Short-end splicing, allowing %d mismatches ***\n",max_splice_mismatches));
+ /* 3 */
+ if (close_mappingend_p == true) {
+ close_knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
+ } else if (middle_mappingend_p == true) {
+ close_knownsplice_limit_high = middle_mappingend_last;
+ close_mappingend_last = middle_mappingend_last;
+ close_mappingend_p = true;
+ debug13(printf("Using middle mappingend => close_mappingend %u\n",close_mappingend_last));
+ }
+ if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_last) {
+ knownsplice_limit_high = middle_mappingend_last;
+ }
- donors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
- antidonors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
- acceptors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
- antiacceptors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
- donors_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
- antidonors_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
- acceptors_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
- antiacceptors_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ /* 4 */
+ if (close_mappingstart_p == true) {
+ knownsplice_limit_low = close_knownsplice_limit_low;
+ }
+ if (close_mappingend_p == true) {
+ knownsplice_limit_high = close_knownsplice_limit_high;
+ }
- debug(printf("Starting find_spliceends (plus)\n"));
- find_spliceends_shortend(&donors_plus,&antidonors_plus,&acceptors_plus,&antiacceptors_plus,
- this->plus_segments,this->plus_nsegments,
-#ifdef DEBUG4E
- /*queryptr*/queryuc_ptr,
-#endif
- floors,querylength,query_lastpos,/*query_compress*/query_compress_fwd,
- max_splice_mismatches,/*plusp*/true,genestrand,first_read_p);
- debug(printf("Finished find_spliceends (plus)\n"));
- debug(printf("Starting find_spliceends (minus)\n"));
- find_spliceends_shortend(&antidonors_minus,&donors_minus,&antiacceptors_minus,&acceptors_minus,
- this->minus_segments,this->minus_nsegments,
-#ifdef DEBUG4E
- /*queryptr*/queryrc,
-#endif
- floors,querylength,query_lastpos,/*query_compress*/query_compress_rev,
- max_splice_mismatches,/*plusp*/false,genestrand,first_read_p);
- debug(printf("Finished find_spliceends (minus)\n"));
+ /* F. Make stage2pairs (anchor) */
+ unsorted_pairs = (List_T) NULL;
- shortendsplicing = find_splicepairs_shortend(&found_score,shortendsplicing,
- donors_plus,antidonors_plus,
- acceptors_plus,antiacceptors_plus,
- donors_minus,antidonors_minus,
- acceptors_minus,antiacceptors_minus,
- query_compress_fwd,query_compress_rev,
- queryuc_ptr,queryrc,min_shortend,
- localsplicing_penalty,
- /*max_mismatches_allowed*/max_splice_mismatches,querylength,
- /*pairedp*/false,genestrand,first_read_p);
- opt_level = (found_score < opt_level) ? found_score : opt_level;
- if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
- done_level = user_maxlevel;
+ debug13(printf("plus anchor diagonal %u (%llu), querypos %d..%d, usedp %d, pairablep %d\n",
+ (Chrpos_T) (anchor_segment->diagonal - chroffset),(unsigned long long) anchor_segment->diagonal,
+ anchor_segment->querypos5,anchor_segment->querypos3,anchor_segment->usedp,anchor_segment->pairablep));
+ querypos = anchor_segment->querypos5;
+ seglength = (anchor_segment->querypos3 + index1part) - querypos;
+
+ left = anchor_segment->diagonal - querylength; /* FORMULA */
+ genomepos = (left - chroffset) + querypos;
+ Genome_get_segment_blocks_right(gsequence_orig,gsequence_alt,/*left*/chroffset+genomepos,
+ seglength,chrhigh,/*revcomp*/false);
+
+ for (i = 0; i < seglength; i++) {
+ c = queryuc_ptr[querypos];
+ g = gsequence_orig[i];
+ g_alt = gsequence_alt[i];
+ if (g == c || g_alt == c) {
+ comp = MATCH_COMP;
+ } else {
+ comp = MISMATCH_COMP;
+ }
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+ querypos++;
+ genomepos++;
+ }
+
+
+ /* F. Make stage2pairs (left) */
+ sorted = &(sorted_allocated[startk+1]);
+ boundpos = anchor_segment->querypos5;
+ for (k = best_starti; k >= 0; k = prev_left[k]) {
+ segment = sorted[k];
+ debug13(printf("plus left diagonal %u (%llu), querypos %d..%d, usedp %d, pairablep %d\n",
+ (Chrpos_T) (segment->diagonal - chroffset),(unsigned long long) segment->diagonal,
+ segment->querypos5,segment->querypos3,segment->usedp,segment->pairablep));
+
+ querypos = segment->querypos5;
+ seglength = (segment->querypos3 + index1part) - querypos;
+
+ left = segment->diagonal - querylength; /* FORMULA */
+ genomepos = (left - chroffset) + querypos;
+ Genome_get_segment_blocks_left(gsequence_orig,gsequence_alt,/*left*/chroffset+genomepos,
+ seglength,chroffset,/*revcomp*/false);
+
+ for (i = 0; i < seglength; i++) {
+ if (querypos < boundpos) {
+ c = queryuc_ptr[querypos];
+ g = gsequence_orig[i];
+ g_alt = gsequence_alt[i];
+ if (g == c || g_alt == c) {
+ comp = MATCH_COMP;
+ } else {
+ comp = MISMATCH_COMP;
+ }
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+ }
+ querypos++;
+ genomepos++;
+ }
+ boundpos = segment->querypos5;
+ }
+
+ /* F. Make stage2pairs (right) */
+ sorted = &(sorted_allocated[anchork+1]);
+ boundpos = anchor_segment->querypos3 + index1part;
+ for (k = best_endi; k >= 0; k = prev_right[k]) {
+ segment = sorted[k];
+ debug13(printf("plus right diagonal %u (%llu), querypos %d..%d, usedp %d, pairablep %d\n",
+ (Chrpos_T) (segment->diagonal - chroffset),(unsigned long long) segment->diagonal,
+ segment->querypos5,segment->querypos3,segment->usedp,segment->pairablep));
+ querypos = segment->querypos5;
+ seglength = (segment->querypos3 + index1part) - querypos;
+
+ left = segment->diagonal - querylength; /* FORMULA */
+ genomepos = left - chroffset + querypos;
+ Genome_get_segment_blocks_right(gsequence_orig,gsequence_alt,/*left*/chroffset+genomepos,
+ seglength,chrhigh,/*revcomp*/false);
+
+ for (i = 0; i < seglength; i++) {
+ if (querypos > boundpos) {
+ c = queryuc_ptr[querypos];
+ g = gsequence_orig[i];
+ g_alt = gsequence_alt[i];
+ if (g == c || g_alt == c) {
+ comp = MATCH_COMP;
+ } else {
+ comp = MISMATCH_COMP;
+ }
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+ }
+ querypos++;
+ genomepos++;
+ }
+ boundpos = segment->querypos3 + index1part;
}
- debug(printf("8> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
- for (i = 0; i <= max_splice_mismatches; i++) {
- substringlist_gc(&(donors_plus[i]));
- substringlist_gc(&(antidonors_plus[i]));
- substringlist_gc(&(acceptors_plus[i]));
- substringlist_gc(&(antiacceptors_plus[i]));
- substringlist_gc(&(donors_minus[i]));
- substringlist_gc(&(antidonors_minus[i]));
- substringlist_gc(&(acceptors_minus[i]));
- substringlist_gc(&(antiacceptors_minus[i]));
+
+ /* Sort pairs and get unique ones */
+ array = (Pair_T *) List_to_array_n(&npairs,unsorted_pairs);
+ qsort(array,npairs,sizeof(Pair_T),Pair_cmp);
+
+ stage2pairs = (List_T) NULL;
+ i = 0;
+ while (i < npairs) {
+ j = i + 1;
+ while (j < npairs && array[j]->querypos == array[i]->querypos) {
+ j++;
+ }
+ if (j == i + 1) {
+ /* Only a single pair at this querypos */
+ debug13(Pair_dump_one(array[i],true));
+ debug13(printf("\n"));
+ stage2pairs = Pairpool_push_existing(stage2pairs,pairpool,array[i]);
+ }
+ i = j;
+ }
+ stage2pairs = List_reverse(stage2pairs);
+ FREE(array);
+
+
+ /* Run GMAP */
+ if (stage2pairs == NULL) {
+ /* hit = (T) NULL; */
+ } else if ((pairarray = Stage3_compute(&pairs,&npairs,&goodness,&cdna_direction,&sensedir,
+ &matches,&nmatches_posttrim,&max_match_length,
+ &ambig_end_length_5,&ambig_end_length_3,
+ &ambig_splicetype_5,&ambig_splicetype_3,
+ &ambig_prob_5,&ambig_prob_3,
+ &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
+ &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,
+ stage2pairs,/*all_stage2_starts*/NULL,/*all_stage2_ends*/NULL,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
+ watsonp ? query_compress_fwd : query_compress_rev,
+#endif
+ /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
+#ifdef EXTRACT_GENOMICSEG
+ /*query_subseq_offset*/0,
+#else
+ /*query_subseq_offset*/0,
+#endif
+ chrnum,chroffset,chrhigh,
+ knownsplice_limit_low,knownsplice_limit_high,/*plusp*/true,genestrand,
+ /*jump_late_p*/false,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ /*sense_try*/0,/*sense_filter*/0,
+ oligoindices_minor,diagpool,cellpool)) == NULL) {
+ /* hit = (T) NULL; */
+
+ } else {
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray,npairs);
+ start = subtract_bounded(chroffset + Pair_genomepos(&(pairarray[0])),
+ /*minusterm*/Pair_querypos(&(pairarray[0])),chroffset);
+ end = add_bounded(chroffset + Pair_genomepos(&(pairarray[npairs-1])),
+ /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),chrhigh);
+
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
+ ambig_end_length_5,ambig_end_length_3,
+ ambig_splicetype_5,ambig_splicetype_3,
+ ambig_prob_5,ambig_prob_3,min_splice_prob,
+ pairarray,npairs,nsegments,nintrons,nindelbreaks,
+ /*left*/start,/*genomiclength*/end - start + 1,
+ /*plusp*/true,genestrand,first_read_p,
+ /*accession*/NULL,querylength,chrnum,chroffset,chrhigh,chrlength,
+ cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SEGMENTS)) == NULL) {
+
+ FREE_OUT(pairarray);
+ } else {
+ hits = List_push(hits,(void *) hit);
+ }
}
- FREEA(donors_plus);
- FREEA(antidonors_plus);
- FREEA(acceptors_plus);
- FREEA(antiacceptors_plus);
- FREEA(donors_minus);
- FREEA(antidonors_minus);
- FREEA(acceptors_minus);
- FREEA(antiacceptors_minus);
}
+ }
+ if (plus_nsegments > 0) {
+#ifdef HAVE_ALLOCA
+ FREEA(gsequence_alt);
+ FREEA(gsequence_orig);
+ if (plus_nsegments < MAX_ALLOCATION) {
+ FREEA(sorted_allocated);
+ FREEA(scores_allocated);
+ FREEA(prev_allocated);
+ } else {
+ FREE(sorted_allocated);
+ FREE(scores_allocated);
+ FREE(prev_allocated);
+ }
+#else
+ FREE(gsequence_alt);
+ FREE(gsequence_orig);
+ FREE(sorted_allocated);
+ FREE(scores_allocated);
+ FREE(prev_allocated);
+#endif
+ }
- if (subs || indels || singlesplicing || doublesplicing || shortendsplicing) {
- /* Don't find distant splicing */
+ return hits;
+}
- } else if (knownsplicingp == false && novelsplicingp == false) {
- /* Don't find distant splicing */
- } else if (done_level < distantsplicing_penalty) {
- /* Want < and not <=, because otherwise distant splicing does not work on 50-bp reads */
- /* Want <= and not <, because distant splicing needs to be better than other alternatives */
- /* Don't find distant splicing */
+static List_T
+convert_minus_segments_to_gmap (History_T gmap_history, List_T hits,
+ char *accession, char *queryuc_ptr, int querylength, int query_lastpos,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ char *queryrc, bool invertedp,
+#endif
+ Compress_T query_compress_fwd, Compress_T query_compress_rev,
+ List_T anchor_segments, struct Segment_T *minus_segments, int minus_nsegments,
+ Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+ Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+ Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ int user_maxlevel, int genestrand, bool first_read_p,
+ bool require_pairing_p) {
+ Univcoord_T chroffset, chrhigh, mappingpos;
+ Univcoord_T origlow, orighigh;
+ Univcoord_T close_mappingstart_last, close_mappingend_last,
+ middle_mappingstart_last, middle_mappingend_last;
+ Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
+ Univcoord_T close_knownsplice_limit_low, close_knownsplice_limit_high;
+ Chrpos_T chrlength;
+ Chrnum_T chrnum;
+ bool close_mappingstart_p, close_mappingend_p;
+ bool middle_mappingstart_p, middle_mappingend_p;
+ bool novelp; /* Want any of the segments in startk..(endk-1) to not be used */
+ bool pairablep; /* Want any of the segments in startk..(endk-1) to be pairable */
- } else {
- /* 9. Find distant splicing iteratively using both known and novel splice sites */
- max_splice_mismatches = done_level - distantsplicing_penalty;
- debug(printf("*** Stage 9. Distant splice ends, allowing %d mismatches ***\n",max_splice_mismatches));
+ List_T p;
+ Segment_T anchor_segment, segment;
+ int anchork, startk, endk, n, i, j, firstj, lastj, k, best_starti, best_endi;
- donors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
- antidonors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
- acceptors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
- antiacceptors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
- donors_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
- antidonors_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
- acceptors_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
- antiacceptors_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ Stage3end_T hit;
+ Pair_T *array;
+ struct Pair_T *pairarray;
+ List_T pairs, stage2pairs, unsorted_pairs;
+ int querypos, boundpos, seglength;
+ Chrpos_T genomepos;
+ char comp, c, g, g_alt;
+ char *gsequence_orig, *gsequence_alt;
- debug(printf("Starting find_spliceends (plus)\n"));
- find_spliceends_distant(&donors_plus,&antidonors_plus,&acceptors_plus,&antiacceptors_plus,
- this->plus_segments,this->plus_nsegments,
-#ifdef DEBUG4E
- /*queryptr*/queryuc_ptr,
-#endif
- floors,querylength,query_lastpos,/*query_compress*/query_compress_fwd,
- max_splice_mismatches,/*plusp*/true,genestrand,first_read_p);
- debug(printf("Finished find_spliceends (plus)\n"));
+ Segment_T *sorted, *sorted_allocated;
+ int *scores, *scores_allocated, best_score, score;
+ int *prev_left, *prev_right, *prev_allocated, besti;
+
+ int sensedir;
+ int npairs, goodness, cdna_direction, matches, nmatches_posttrim,
+ max_match_length, ambig_end_length_5, ambig_end_length_3,
+ unknowns, mismatches, qopens, qindels, topens, tindels,
+ ncanonical, nsemicanonical, nnoncanonical;
+ double ambig_prob_5, ambig_prob_3, min_splice_prob;
+ Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
+ Univcoord_T start, end, left;
+ int nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
- debug(printf("Starting find_spliceends (minus)\n"));
- find_spliceends_distant(&antidonors_minus,&donors_minus,&antiacceptors_minus,&acceptors_minus,
- this->minus_segments,this->minus_nsegments,
-#ifdef DEBUG4E
- /*queryptr*/queryrc,
+ if (minus_nsegments > 0) {
+#ifdef HAVE_ALLOCA
+ if (minus_nsegments < MAX_ALLOCATION) {
+ prev_allocated = (int *) ALLOCA(minus_nsegments*sizeof(int));
+ scores_allocated = (int *) ALLOCA(minus_nsegments*sizeof(int));
+ sorted_allocated = (Segment_T *) ALLOCA(minus_nsegments*sizeof(Segment_T));
+ } else {
+ prev_allocated = (int *) MALLOC(minus_nsegments*sizeof(int));
+ scores_allocated = (int *) MALLOC(minus_nsegments*sizeof(int));
+ sorted_allocated = (Segment_T *) MALLOC(minus_nsegments*sizeof(Segment_T));
+ }
+ gsequence_orig = (char *) MALLOCA((querylength+1) * sizeof(char));
+ gsequence_alt = (char *) MALLOCA((querylength+1) * sizeof(char));
+#else
+ prev_allocated = (int *) MALLOC(minus_nsegments*sizeof(int));
+ scores_allocated = (int *) MALLOC(minus_nsegments*sizeof(int));
+ sorted_allocated = (Segment_T *) MALLOC(minus_nsegments*sizeof(Segment_T));
+ gsequence_orig = (char *) MALLOC((querylength+1) * sizeof(char));
+ gsequence_alt = (char *) MALLOC((querylength+1) * sizeof(char));
#endif
- floors,querylength,query_lastpos,/*query_compress*/query_compress_rev,
- max_splice_mismatches,/*plusp*/false,genestrand,first_read_p);
- debug(printf("Finished find_spliceends (minus)\n"));
+ }
+ anchork = 0;
+ for (p = anchor_segments; p != NULL; p = List_next(p)) {
+ anchor_segment = (Segment_T) List_head(p);
+ assert(anchor_segment->diagonal != (Univcoord_T) -1);
+ while (minus_segments[anchork].diagonal != anchor_segment->diagonal) {
+ anchork++;
+ }
- nmismatches = 0;
- ambiguousp = false;
- while (longsinglesplicing == NULL &&
- nmismatches <= done_level - distantsplicing_penalty &&
- nsplicepairs < MAXCHIMERAPATHS && ambiguousp == false) {
- debug(printf("*** Stage 9. Distant splicing, allowing %d mismatches ***\n",nmismatches));
+ startk = anchork - 1;
+ while (startk >= 0 && minus_segments[startk].diagonal != (Univcoord_T) -1 &&
+ minus_segments[startk].diagonal + shortsplicedist > anchor_segment->diagonal) {
+ startk--;
+ }
- debug4e(printf("Sorting splice ends\n"));
- donors_plus[nmismatches] = Substring_sort_chimera_halves(donors_plus[nmismatches],/*ascendingp*/true);
- acceptors_plus[nmismatches] = Substring_sort_chimera_halves(acceptors_plus[nmismatches],/*ascendingp*/true);
+ endk = anchork + 1;
+ while (endk < minus_nsegments && minus_segments[endk].diagonal < anchor_segment->diagonal + shortsplicedist) {
+ endk++;
+ }
+ debug13(printf("%s read: Found minus segments %d to %d inclusive for anchor %d\n",
+ first_read_p ? "First" : "Second",startk+1,endk-1,anchork));
- antidonors_plus[nmismatches] = Substring_sort_chimera_halves(antidonors_plus[nmismatches],/*ascendingp*/false);
- antiacceptors_plus[nmismatches] = Substring_sort_chimera_halves(antiacceptors_plus[nmismatches],/*ascendingp*/false);
- donors_minus[nmismatches] = Substring_sort_chimera_halves(donors_minus[nmismatches],/*ascendingp*/false);
- acceptors_minus[nmismatches] = Substring_sort_chimera_halves(acceptors_minus[nmismatches],/*ascendingp*/false);
+ /* Dynamic programming on left (low) side (querypos3) */
+ if ((n = (anchork - 1) - (startk + 1) + 1) == 0) {
+ best_starti = -1;
+ } else {
+ prev_left = &(prev_allocated[startk+1]);
+ scores = &(scores_allocated[startk+1]);
+ sorted = &(sorted_allocated[startk+1]);
+
+ for (k = startk + 1, i = 0; k < anchork; k++) {
+ sorted[i++] = &(minus_segments[k]);
+ }
+ qsort(sorted,n,sizeof(Segment_T),Segment_querypos3_descending_cmp);
+
+ lastj = 0;
+ while (lastj < n && sorted[lastj]->querypos3 > anchor_segment->querypos3) {
+ lastj++;
+ }
+
+ for (j = 0; j < lastj; j++) {
+ best_score = 0;
+ besti = -1;
+ for (i = 0; i < j; i++) {
+ if (sorted[i]->lowpos >= sorted[j]->lowpos) {
+ /* Skip, since doesn't add nucleotides to left */
+ } else if (sorted[i]->highpos < sorted[j]->lowpos) {
+ if ((score = (sorted[i]->highpos - sorted[i]->lowpos)) > best_score) {
+ best_score = score;
+ besti = i;
+ }
+ } else if ((score = (sorted[j]->lowpos - sorted[i]->lowpos)) > best_score) {
+ best_score = score;
+ besti = i;
+ }
+ }
+ scores[j] = sorted[j]->highpos - sorted[j]->lowpos;
+ debug13(printf("Best prev is %d with score %d\n",besti,best_score));
+ if ((prev_left[j] = besti) >= 0) {
+ scores[j] += best_score;
+ }
+ }
- antidonors_minus[nmismatches] = Substring_sort_chimera_halves(antidonors_minus[nmismatches],/*ascendingp*/true);
- antiacceptors_minus[nmismatches] = Substring_sort_chimera_halves(antiacceptors_minus[nmismatches],/*ascendingp*/true);
+ /* Anchor segment */
+ best_score = 0;
+ best_starti = -1;
+ for (i = 0; i < lastj; i++) {
+ if (sorted[i]->lowpos >= anchor_segment->lowpos) {
+ /* Skip, since doesn't add nucleotides to left */
+ } else if (sorted[i]->highpos < anchor_segment->lowpos) {
+ if ((score = (sorted[i]->highpos - sorted[i]->lowpos)) > best_score) {
+ best_score = score;
+ best_starti = i;
+ }
+ } else if ((score = (anchor_segment->lowpos - sorted[i]->lowpos)) > best_score) {
+ best_score = score;
+ best_starti = i;
+ }
+ }
+ }
- debug4e(printf("Splice ends at %d nmismatches: +donors/acceptors %d/%d, +antidonors/antiacceptors %d/%d, -donors/acceptors %d/%d, -antidonors/antiacceptors %d/%d\n",
- nmismatches,
- List_length(donors_plus[nmismatches]),List_length(acceptors_plus[nmismatches]),
- List_length(antidonors_plus[nmismatches]),List_length(antiacceptors_plus[nmismatches]),
- List_length(donors_minus[nmismatches]),List_length(acceptors_minus[nmismatches]),
- List_length(antidonors_minus[nmismatches]),List_length(antiacceptors_minus[nmismatches])));
- distantsplicing = find_splicepairs_distant(&found_score,&nsplicepairs,&longsinglesplicing,distantsplicing,
- donors_plus,antidonors_plus,acceptors_plus,antiacceptors_plus,
- donors_minus,antidonors_minus,acceptors_minus,antiacceptors_minus,
- localsplicing_penalty,distantsplicing_penalty,
- querylength,nmismatches,first_read_p);
-#if 0
- assert(List_length(distantsplicing) <= 1);
-#endif
+ /* Dynamic programming on right (high) side (querypos5) */
+ if ((n = (endk - 1) - (anchork + 1) + 1) == 0) {
+ best_endi = -1;
+ } else {
+ prev_right = &(prev_allocated[anchork+1]);
+ scores = &(scores_allocated[anchork+1]);
+ sorted = &(sorted_allocated[anchork+1]);
+
+ for (k = anchork + 1, i = 0; k < endk; k++) {
+ sorted[i++] = &(minus_segments[k]);
+ }
+ qsort(sorted,n,sizeof(Segment_T),Segment_querypos5_descending_cmp);
+
+ firstj = n - 1;
+ while (firstj >= 0 && sorted[firstj]->querypos5 < anchor_segment->querypos5) {
+ firstj--;
+ }
+
+ for (j = n - 1; j > firstj; j--) {
+ best_score = 0;
+ besti = -1;
+ for (i = n - 1; i > j; i--) {
+ if (sorted[i]->highpos <= sorted[i]->highpos) {
+ /* Skip, since doesn't add nucleotides to right */
+ } else if (sorted[i]->lowpos > sorted[j]->highpos) {
+ if ((score = (sorted[i]->highpos - sorted[i]->lowpos)) > best_score) {
+ best_score = score;
+ besti = i;
+ }
+ } else if ((score = (sorted[i]->highpos - sorted[j]->highpos)) > best_score) {
+ best_score = score;
+ besti = i;
+ }
+ }
+ scores[j] = sorted[j]->highpos - sorted[j]->lowpos;
+ debug13(printf("Best prev is %d with score %d\n",besti,best_score));
+ if ((prev_right[j] = besti) >= 0) {
+ scores[j] += best_score;
+ }
+ }
-#if 0
- /* Mark ambiguous splices only for single-end reads */
- distantsplicing = Stage3end_mark_ambiguous_splices(&ambiguousp,distantsplicing);
-#endif
+ /* Anchor segment */
+ best_score = 0;
+ best_endi = -1;
+ for (i = n - 1; i > firstj; i--) {
+ if (sorted[i]->highpos <= anchor_segment->highpos) {
+ /* Skip, since doesn't add nucleotides to right */
+ } else if (sorted[i]->lowpos > anchor_segment->highpos) {
+ if ((score = (sorted[i]->highpos - sorted[i]->lowpos)) > best_score) {
+ best_score = score;
+ best_endi = i;
+ }
+ } else if ((score = (sorted[i]->highpos - anchor_segment->highpos)) > best_score) {
+ best_score = score;
+ best_endi = i;
+ }
+ }
+ }
- /* Excess distant splicing should be freed already in find_splicepairs_distant */
- debug(printf("Entering Stage3end_optimal_score with %d hits\n",List_length(distantsplicing)));
- distantsplicing = Stage3end_optimal_score(distantsplicing,opt_level,subopt_levels,
- query_compress_fwd,query_compress_rev,querylength,
- /*keep_gmap_p*/true,/*finalp*/false);
- debug(printf("Exiting Stage3end_optimal_score with %d hits\n",List_length(distantsplicing)));
+ /* Evaluate set of segments */
+ novelp = pairablep = false;
+ if (anchor_segment->usedp == false) {
+ novelp = true;
+ }
+ if (anchor_segment->pairablep == true) {
+ pairablep = true;
+ }
- if (distantsplicing) {
- opt_level = (found_score < opt_level) ? found_score : opt_level;
- if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
- done_level = user_maxlevel;
+ sorted = &(sorted_allocated[startk+1]);
+ for (k = best_starti; k >= 0; k = prev_left[k]) {
+ if (sorted[k]->usedp == false) {
+ novelp = true;
+ }
+ if (sorted[k]->pairablep == true) {
+ pairablep = true;
+ }
+ }
+
+ sorted = &(sorted_allocated[anchork+1]);
+ for (k = best_endi; k >= 0; k = prev_right[k]) {
+ if (sorted[k]->usedp == false) {
+ novelp = true;
+ }
+ if (sorted[k]->pairablep == true) {
+ pairablep = true;
+ }
+ }
+
+
+ debug13(printf("%s read: Processing minus segments %d to %d inclusive: novelp %d, pairablep %d\n",
+ first_read_p ? "First" : "Second",startk+1,endk-1,novelp,pairablep));
+ if (novelp == true && (pairablep == true || require_pairing_p == false)) {
+ anchor_segment->usedp = true;
+ chrnum = anchor_segment->chrnum;
+ chroffset = anchor_segment->chroffset;
+ chrhigh = anchor_segment->chrhigh;
+ chrlength = anchor_segment->chrlength;
+
+ left = anchor_segment->diagonal - querylength; /* FORMULA */
+ origlow = left - (querylength - anchor_segment->querypos3);
+ orighigh = left + anchor_segment->querypos5;
+
+ /* extend right */
+ knownsplice_limit_low = subtract_bounded(origlow,shortsplicedist,chroffset);
+ debug13(printf("Original bounds C: knownsplice_limit_low %u\n",knownsplice_limit_low - chroffset));
+
+ /* extend left */
+ knownsplice_limit_high = add_bounded(orighigh,shortsplicedist,chrhigh);
+ debug13(printf("Original bounds D: knownsplice_limit_high %u\n",knownsplice_limit_high - chroffset));
+
+ close_mappingstart_last = middle_mappingstart_last = origlow;
+ close_mappingend_last = middle_mappingend_last = orighigh;
+ close_mappingstart_p = close_mappingend_p = false;
+ middle_mappingstart_p = middle_mappingend_p = false;
+
+ /* 1 */
+ sorted = &(sorted_allocated[startk+1]);
+ for (k = best_starti; k >= 0; k = prev_left[k]) {
+ segment = sorted[k];
+ segment->usedp = true;
+ debug13(printf("1. minus diagonal %u (%llu), querypos %d..%d, usedp %d, pairablep %d\n",
+ (Chrpos_T) (segment->diagonal - chroffset),(unsigned long long) segment->diagonal,
+ segment->querypos5,segment->querypos3,segment->usedp,segment->pairablep));
+ if (query_lastpos - segment->querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 2. Missing end of query, so there could be a middle splice */
+ debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ query_lastpos,segment->querypos3,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = subtract_bounded(segment->diagonal,querylength + shortsplicedist,chroffset)) < middle_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ middle_mappingstart_last = mappingpos;
+ middle_mappingstart_p = true;
+ debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
+ }
+
+ } else {
+ debug13b(printf(" query_lastpos %d - querypos3 %d < %d + %d, so using this diagonal\n",
+ query_lastpos,segment->querypos3,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = subtract_bounded(segment->diagonal,querylength,chroffset)) < close_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ close_mappingstart_last = mappingpos;
+ close_mappingstart_p = true;
+ debug13(printf(" Redefining close mappingstart last to %u\n",close_mappingstart_last - chroffset));
}
- debug(printf("9> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
}
- nmismatches++;
+
+ if (segment->querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 4. Missing start of query, so there could be a middle splice */
+ debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ segment->querypos5,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = add_bounded(segment->diagonal,shortsplicedist,chrhigh)) > middle_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ middle_mappingend_last = mappingpos;
+ middle_mappingend_p = true;
+ debug13(printf(" Redefining middle mappingend last to %u\n",middle_mappingend_last - chroffset));
+ }
+
+ } else {
+ debug13b(printf(" querypos5 %d < %d + %d, so using this diagonal\n",
+ segment->querypos5,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = segment->diagonal) > close_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ close_mappingend_last = mappingpos;
+ close_mappingend_p = true;
+ debug13(printf(" Redefining close mappingend last to %u\n",close_mappingend_last - chroffset));
+ }
+ }
}
- if (longsinglesplicing != NULL) {
- debug(printf("Entering Stage3end_optimal_score with %d longsinglesplicing hits\n",List_length(longsinglesplicing)));
- longsinglesplicing = Stage3end_optimal_score(longsinglesplicing,opt_level,subopt_levels,
- query_compress_fwd,query_compress_rev,querylength,
- /*keep_gmap_p*/true,/*finalp*/false);
- debug(printf("Exiting Stage3end_optimal_score with %d hits\n",List_length(longsinglesplicing)));
+ sorted = &(sorted_allocated[anchork+1]);
+ for (k = best_endi; k >= 0; k = prev_right[k]) {
+ segment = sorted[k];
+ segment->usedp = true;
+ debug13(printf("1. minus diagonal %u (%llu), querypos %d..%d, usedp %d, pairablep %d\n",
+ (Chrpos_T) (segment->diagonal - chroffset),(unsigned long long) segment->diagonal,
+ segment->querypos5,segment->querypos3,segment->usedp,segment->pairablep));
+ if (query_lastpos - segment->querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 2. Missing end of query, so there could be a middle splice */
+ debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ query_lastpos,segment->querypos3,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = subtract_bounded(segment->diagonal,querylength + shortsplicedist,chroffset)) < middle_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ middle_mappingstart_last = mappingpos;
+ middle_mappingstart_p = true;
+ debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
+ }
+
+ } else {
+ debug13b(printf(" query_lastpos %d - querypos3 %d < %d + %d, so using this diagonal\n",
+ query_lastpos,segment->querypos3,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = subtract_bounded(segment->diagonal,querylength,chroffset)) < close_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ close_mappingstart_last = mappingpos;
+ close_mappingstart_p = true;
+ debug13(printf(" Redefining close mappingstart last to %u\n",close_mappingstart_last - chroffset));
+ }
+ }
+
- opt_level = (found_score < opt_level) ? found_score : opt_level;
- if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
- done_level = user_maxlevel;
+ if (segment->querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 4. Missing start of query, so there could be a middle splice */
+ debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ segment->querypos5,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = add_bounded(segment->diagonal,shortsplicedist,chrhigh)) > middle_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ middle_mappingend_last = mappingpos;
+ middle_mappingend_p = true;
+ debug13(printf(" Redefining middle mappingend last to %u\n",middle_mappingend_last - chroffset));
+ }
+
+ } else {
+ debug13b(printf(" querypos5 %d < %d + %d, so using this diagonal\n",
+ segment->querypos5,STAGE2_MIN_OLIGO,index1interval));
+ if ((mappingpos = segment->diagonal) > close_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ close_mappingend_last = mappingpos;
+ close_mappingend_p = true;
+ debug13(printf(" Redefining close mappingend last to %u\n",close_mappingend_last - chroffset));
+ }
}
- debug(printf("9> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
+ }
+
+ /* 2 */
+ if (close_mappingstart_p == true) {
+ close_knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
+ } else if (middle_mappingstart_p == true) {
+ debug13(printf("Using middle mappingstart\n"));
+ close_knownsplice_limit_low = middle_mappingstart_last;
+ close_mappingstart_last = middle_mappingstart_last;
+ close_mappingstart_p = true;
+ }
+ if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_last) {
+ knownsplice_limit_low = middle_mappingstart_last;
+ }
+
+ /* 3 */
+ if (close_mappingend_p == true) {
+ close_knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
+ } else if (middle_mappingend_p == true) {
+ debug13(printf("Using middle mappingend\n"));
+ close_knownsplice_limit_high = middle_mappingend_last;
+ close_mappingend_last = middle_mappingend_last;
+ close_mappingend_p = true;
+ }
+ if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_last) {
+ knownsplice_limit_high = middle_mappingend_last;
}
- for (i = 0; i <= max_splice_mismatches; i++) {
- substringlist_gc(&(donors_plus[i]));
- substringlist_gc(&(antidonors_plus[i]));
- substringlist_gc(&(acceptors_plus[i]));
- substringlist_gc(&(antiacceptors_plus[i]));
- substringlist_gc(&(donors_minus[i]));
- substringlist_gc(&(antidonors_minus[i]));
- substringlist_gc(&(acceptors_minus[i]));
- substringlist_gc(&(antiacceptors_minus[i]));
+ /* 4 */
+ if (close_mappingstart_p == true) {
+ knownsplice_limit_low = close_knownsplice_limit_low;
+ }
+ if (close_mappingend_p == true) {
+ knownsplice_limit_high = close_knownsplice_limit_high;
}
- FREEA(donors_plus);
- FREEA(antidonors_plus);
- FREEA(acceptors_plus);
- FREEA(antiacceptors_plus);
- FREEA(donors_minus);
- FREEA(antidonors_minus);
- FREEA(acceptors_minus);
- FREEA(antiacceptors_minus);
- }
- debug(printf("%d single splices, %d double splices, %d short-end splices, %d long single splices, %d distant splices\n",
- List_length(singlesplicing),List_length(doublesplicing),
- List_length(shortendsplicing),List_length(longsinglesplicing),
- List_length(distantsplicing)));
- }
- debug(printf("Before terminals:\n"));
- debug(printf(" subs: %d\n",List_length(subs)));
- debug(printf(" indels: %d\n",List_length(indels)));
- debug(printf(" singlesplicing %d\n",List_length(singlesplicing)));
- debug(printf(" doublesplicing %d\n",List_length(doublesplicing)));
- debug(printf(" shortendsplicing: %d\n",List_length(shortendsplicing)));
- debug(printf(" longsinglesplicing %d\n",List_length(longsinglesplicing)));
- debug(printf(" distantsplicing: %d\n",List_length(distantsplicing)));
- debug(printf(" done_level: %d\n",done_level));
- /* 9. GMAP indels_plus_knownsplicing */
- if (indels != NULL && gmap_indel_knownsplice_p == true) {
- debug13(printf("GMAP indels plus known splicing: %d indels\n",List_length(indels)));
- new_indels = (List_T) NULL;
- for (p = indels; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- if (Stage3end_indel_contains_known_splicesite(&extend_left_p,&extend_right_p,hit) == false) {
- new_indels = List_push(new_indels,(void *) hit);
- } else {
- gmap_hits = align_single_hit_with_gmap(gmap_history,hit,extend_left_p,extend_right_p,
- queryuc_ptr,querylength,query_lastpos,
-#ifdef END_KNOWNSPLICING_SHORTCUT
- queryrc,Shortread_invertedp(queryseq),
-#endif
- query_compress_fwd,query_compress_rev,
- this->plus_segments,this->plus_nsegments,this->minus_segments,this->minus_nsegments,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel,genestrand,first_read_p);
-
- gmap_better_p = false;
- missing_hit = querylength - Stage3end_nmatches_posttrim(hit);
- for (a = gmap_hits; a != NULL; a = List_next(a)) {
- gmap = (Stage3end_T) List_head(a);
- missing_gmap = querylength - Stage3end_nmatches_posttrim(gmap);
- if (Stage3end_score(gmap) > opt_level + gmap_allowance) {
- debug13(printf("Score is only %d vs opt_level %d\n",Stage3end_score(gmap),opt_level));
- Stage3end_free(&gmap);
- } else if (missing_gmap < missing_hit/2) {
- debug13(printf("GMAP with %d matches, %d missing is significantly better than indel with %d matches, %d missing\n",
- Stage3end_nmatches_posttrim(gmap),missing_gmap,Stage3end_nmatches_posttrim(hit),missing_hit));
- good_gmap_hits = List_push(good_gmap_hits,(void *) gmap);
- gmap_better_p = true;
- Stage3end_set_improved_by_gmap(hit);
- } else {
- debug13(printf("GMAP with %d matches, %d missing is not significantly better than indel with %d matches, %d missing\n",
- Stage3end_nmatches_posttrim(gmap),missing_gmap,Stage3end_nmatches_posttrim(hit),missing_hit));
- Stage3end_free(&gmap);
+ /* F. Make stage2pairs (anchor) */
+ unsorted_pairs = (List_T) NULL;
+
+ debug13(printf("minus anchor diagonal %u (%llu), querypos %d..%d, usedp %d, pairablep %d\n",
+ (Chrpos_T) (anchor_segment->diagonal - chroffset),(unsigned long long) anchor_segment->diagonal,
+ anchor_segment->querypos5,anchor_segment->querypos3,anchor_segment->usedp,anchor_segment->pairablep));
+ querypos = anchor_segment->querypos5;
+ seglength = (anchor_segment->querypos3 + index1part) - querypos;
+
+ /* left = anchor_segment->diagonal - querylength; -- FORMULA */
+ genomepos = chrhigh - (anchor_segment->diagonal - 1) + querypos;
+ Genome_get_segment_blocks_right(gsequence_orig,gsequence_alt,/*left*/anchor_segment->diagonal - querypos - seglength,
+ seglength,chrhigh,/*revcomp*/true);
+
+ for (i = 0; i < seglength; i++) {
+ c = queryuc_ptr[querypos];
+ g = gsequence_orig[i];
+ g_alt = gsequence_alt[i];
+ if (g == c || g_alt == c) {
+ comp = MATCH_COMP;
+ } else {
+ comp = MISMATCH_COMP;
+ }
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+ querypos++;
+ genomepos++;
+ }
+
+ /* F. Make stage2pairs (left) */
+ sorted = &(sorted_allocated[startk+1]);
+ boundpos = anchor_segment->querypos3 + index1part;
+ for (k = best_starti; k >= 0; k = prev_left[k]) {
+ segment = sorted[k];
+ debug13(printf("minus left diagonal %u (%llu), querypos %d..%d, usedp %d, pairablep %d\n",
+ (Chrpos_T) (segment->diagonal - chroffset),(unsigned long long) segment->diagonal,
+ segment->querypos5,segment->querypos3,segment->usedp,segment->pairablep));
+ querypos = segment->querypos5;
+ seglength = (segment->querypos3 + index1part) - querypos;
+
+ /* left = segment->diagonal - querylength; -- FORMULA */
+ genomepos = chrhigh - (segment->diagonal - 1) + querypos;
+ Genome_get_segment_blocks_left(gsequence_orig,gsequence_alt,/*left*/segment->diagonal - querypos - seglength,
+ seglength,chroffset,/*revcomp*/true);
+
+ for (i = 0; i < seglength; i++) {
+ if (querypos > boundpos) {
+ c = queryuc_ptr[querypos];
+ g = gsequence_orig[i];
+ g_alt = gsequence_alt[i];
+ if (g == c || g_alt == c) {
+ comp = MATCH_COMP;
+ } else {
+ comp = MISMATCH_COMP;
+ }
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+ }
+ querypos++;
+ genomepos++;
+ }
+ boundpos = segment->querypos3 + index1part;
+ }
+
+ /* F. Make stage2pairs (right) */
+ sorted = &(sorted_allocated[anchork+1]);
+ boundpos = anchor_segment->querypos5;
+ for (k = best_endi; k >= 0; k = prev_right[k]) {
+ segment = sorted[k];
+ debug13(printf("minus right diagonal %u (%llu), querypos %d..%d, usedp %d, pairablep %d\n",
+ (Chrpos_T) (segment->diagonal - chroffset),(unsigned long long) segment->diagonal,
+ segment->querypos5,segment->querypos3,segment->usedp,segment->pairablep));
+ querypos = segment->querypos5;
+ seglength = (segment->querypos3 + index1part) - querypos;
+
+ /* left = segment->diagonal - querylength; -- FORMULA */
+ genomepos = chrhigh - (segment->diagonal - 1) + querypos;
+ Genome_get_segment_blocks_right(gsequence_orig,gsequence_alt,/*left*/segment->diagonal - querypos - seglength,
+ seglength,chrhigh,/*revcomp*/true);
+
+ for (i = 0; i < seglength; i++) {
+ if (querypos < boundpos) {
+ c = queryuc_ptr[querypos];
+ g = gsequence_orig[i];
+ g_alt = gsequence_alt[i];
+ if (g == c || g_alt == c) {
+ comp = MATCH_COMP;
+ } else {
+ comp = MISMATCH_COMP;
+ }
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
+ unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
}
+ querypos++;
+ genomepos++;
}
- List_free(&gmap_hits);
+ boundpos = segment->querypos5;
+ }
+
+ /* Sort pairs and get unique ones */
+ array = (Pair_T *) List_to_array_n(&npairs,unsorted_pairs);
+ qsort(array,npairs,sizeof(Pair_T),Pair_cmp);
+
+ stage2pairs = (List_T) NULL;
+ i = 0;
+ while (i < npairs) {
+ j = i + 1;
+ while (j < npairs && array[j]->querypos == array[i]->querypos) {
+ j++;
+ }
+ if (j == i + 1) {
+ /* Only a single pair at this querypos */
+ debug13(Pair_dump_one(array[i],true));
+ debug13(printf("\n"));
+ stage2pairs = Pairpool_push_existing(stage2pairs,pairpool,array[i]);
+ }
+ i = j;
+ }
+ stage2pairs = List_reverse(stage2pairs);
+ FREE(array);
+
- if (gmap_better_p == true) {
- Stage3end_free(&hit);
+ /* Run GMAP */
+ if (stage2pairs == NULL) {
+ /* hit = (T) NULL; */
+
+ } else if ((pairarray = Stage3_compute(&pairs,&npairs,&goodness,&cdna_direction,&sensedir,
+ &matches,&nmatches_posttrim,&max_match_length,
+ &ambig_end_length_5,&ambig_end_length_3,
+ &ambig_splicetype_5,&ambig_splicetype_3,
+ &ambig_prob_5,&ambig_prob_3,
+ &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
+ &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,
+ stage2pairs,/*all_stage2_starts*/NULL,/*all_stage2_ends*/NULL,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
+ watsonp ? query_compress_fwd : query_compress_rev,
+#endif
+ /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
+#ifdef EXTRACT_GENOMICSEG
+ /*query_subseq_offset*/0,
+#else
+ /*query_subseq_offset*/0,
+#endif
+ chrnum,chroffset,chrhigh,
+ knownsplice_limit_low,knownsplice_limit_high,/*plusp*/false,genestrand,
+ /*jump_late_p*/true,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ /*sense_try*/0,/*sense_filter*/0,
+ oligoindices_minor,diagpool,cellpool)) == NULL) {
+ /* hit = (T) NULL; */
+
+ } else {
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray,npairs);
+ start = add_bounded(chroffset + Pair_genomepos(&(pairarray[0])),
+ /*plusterm*/Pair_querypos(&(pairarray[0])),chrhigh);
+ end = subtract_bounded(chroffset + Pair_genomepos(&(pairarray[npairs-1])),
+ /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),chroffset);
+
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
+ ambig_end_length_5,ambig_end_length_3,
+ ambig_splicetype_5,ambig_splicetype_3,
+ ambig_prob_5,ambig_prob_3,min_splice_prob,
+ pairarray,npairs,nsegments,nintrons,nindelbreaks,
+ /*left*/end,/*genomiclength*/start - end + 1,
+ /*plusp*/false,genestrand,first_read_p,
+ /*accession*/NULL,querylength,chrnum,chroffset,chrhigh,chrlength,
+ cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SEGMENTS)) == NULL) {
+ FREE_OUT(pairarray);
} else {
- new_indels = List_push(new_indels,(void *) hit);
+ hits = List_push(hits,(void *) hit);
}
}
}
- List_free(&indels);
- indels = new_indels;
}
-
- /* 10. Terminals */
- /* Previously did not find terminals if (subs || indels || good_gmap_hits || singlesplicing || doublesplicing || shortendsplicing || distantsplicing) */
-
- if (done_level >= terminal_threshold) {
- max_mismatches_allowed = done_level;
- debug(printf("*** Stage 10. Terminals up to %d mismatches ***\n",max_mismatches_allowed));
- if (floors_computed_p == false) {
- floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,querylength,query_lastpos,
- plus_indexdb,minus_indexdb,indexdb_size_threshold,max_end_insertions,
- /*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
+ if (minus_nsegments > 0) {
+#ifdef HAVE_ALLOCA
+ FREEA(gsequence_alt);
+ FREEA(gsequence_orig);
+ if (minus_nsegments < MAX_ALLOCATION) {
+ FREEA(sorted_allocated);
+ FREEA(scores_allocated);
+ FREEA(prev_allocated);
+ } else {
+ FREE(sorted_allocated);
+ FREE(scores_allocated);
+ FREE(prev_allocated);
}
-
- if (segments_computed_p == false) {
- this->plus_segments = identify_all_segments_for_terminals(&this->plus_nsegments,
-#ifdef LARGE_GENOMES
- this->plus_positions_high,this->plus_positions_low,
-#else
- this->plus_positions,
-#endif
- this->plus_npositions,this->omitted,querylength,query_lastpos,
- floors,max_mismatches_allowed,/*plusp*/true);
- this->minus_segments = identify_all_segments_for_terminals(&this->minus_nsegments,
-#ifdef LARGE_GENOMES
- this->minus_positions_high,this->minus_positions_low,
#else
- this->minus_positions,
-#endif
- this->minus_npositions,this->omitted,querylength,query_lastpos,
- floors,max_mismatches_allowed,/*plusp*/false);
- }
-
- terminals = find_terminals(this->plus_segments,this->plus_nsegments,this->minus_segments,this->minus_nsegments,
-#ifdef DEBUG4T
- queryuc_ptr,queryrc,
-#endif
- floors,querylength,query_lastpos,
- query_compress_fwd,query_compress_rev,
- max_mismatches_allowed,genestrand,first_read_p);
-#if 0
- opt_level = (found_score < opt_level) ? found_score : opt_level;
- if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
- done_level = user_maxlevel;
- }
- debug(printf("10> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
+ FREE(gsequence_alt);
+ FREE(gsequence_orig);
+ FREE(sorted_allocated);
+ FREE(scores_allocated);
+ FREE(prev_allocated);
#endif
}
+ return hits;
+}
- /* This step does make sense, in contrast with paired-end alignment,
- where terminals should get improved by GMAP improvement */
- debug13(printf("%d terminals (vs max_gmap_terminal %d)\n",List_length(terminals),max_gmap_terminal));
- if (terminals != NULL && gmap_terminal_p == true) {
- /* 11. GMAP terminal */
+static List_T
+align_singleend_with_gmap (History_T gmap_history, List_T result, T this,
+ Compress_T query_compress_fwd, Compress_T query_compress_rev,
+ char *accession, char *queryuc_ptr, int querylength, int query_lastpos,
+ Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+ Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+ Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ int user_maxlevel, int cutoff_level, bool first_read_p) {
+ List_T new_result = NULL, gmap_hits = NULL;
+ Stage3end_T hit, gmap;
+ List_T p, a;
+ int genestrand;
+ int missing_hit, missing_gmap;
+ int i;
+
+
+ debug13(printf("Sorting hits by nmatches\n"));
+ result = Stage3end_sort_bymatches(result);
+
+ for (p = result, i = 0; p != NULL && i < max_gmap_improvement; p = p->rest, i++) {
+ hit = (Stage3end_T) List_head(p);
+ genestrand = Stage3end_genestrand(hit);
+
+ debug13(printf("GMAP improvement: Entering align_singleend_with_gmap with hittype %s\n",
+ Stage3end_hittype_string(hit)));
+
+ /* Was querylength5 - Stage3end_matches(hit5) > 5 */
+ if (Stage3end_hittype(hit) == GMAP) {
+ /* Skip */
+ debug13(printf("Skipping hit of type GMAP\n"));
+ new_result = List_push(new_result,(void *) hit);
+
+ } else if (Stage3end_improved_by_gmap_p(hit) == true) {
+ /* Skip */
+ debug13(printf("Skipping hit already improved by GMAP\n"));
+ new_result = List_push(new_result,(void *) hit);
+
#if 0
- /* This is done for paired-ends, but should not be necessary for single-end */
- debug13(printf("Before remove overlaps at cutoff level %d: %d hits\n",opt_level,List_length(terminals)));
- terminals = Stage3end_sort_bymatches(Stage3end_remove_overlaps(terminals,/*finalp*/false));
- debug13(printf("After remove overlaps: %d\n",List_length(terminals)));
-#endif
-
- if (List_length(terminals) <= max_gmap_terminal) {
- debug13(printf("%d hits\n",List_length(terminals)));
- debug13(printf("For terminals, running GMAP on single end to match with terminal\n"));
-
- for (p = terminals; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- /* Was align_single_terminal_with_gmap() */
- gmap_hits = align_single_hit_with_gmap(gmap_history,hit,
- /*extend_left_p*/Stage3end_start_endtype(hit) == TERM,
- /*extend_right_p*/Stage3end_end_endtype(hit) == TERM,
- queryuc_ptr,querylength,query_lastpos,
+ /* Don't skip on final align_singleend_with_gmap */
+ } else if (Stage3end_hittype(hit) == TERMINAL) {
+ /* Skip */
+ debug13(printf("Skipping hit of type TERMINAL\n"));
+ new_result = List_push(new_result,(void *) hit);
+#endif
+
+ } else if (querylength - Stage3end_nmatches_posttrim(hit) <= user_maxlevel) {
+ /* Skip */
+ debug13(printf("Skipping hit with nmismatches %d - %d <= user_maxlevel %d\n",
+ querylength,Stage3end_nmatches_posttrim(hit),user_maxlevel));
+ new_result = List_push(new_result,(void *) hit);
+
+ } else if (Stage3end_terminal_trim(hit) <= GMAP_TERMINAL_TRIM
+ && Stage3end_contains_known_splicesite(hit) == false
+ ) {
+ debug13(printf("Skipping good hit\n"));
+ new_result = List_push(new_result,(void *) hit);
+
+ } else {
+ debug13(printf("To correct hit terminalp %d or known_splicesite %d, running GMAP on 5' to match with 3' end\n",
+ Stage3end_hittype(hit) == TERMINAL,
+ Stage3end_contains_known_splicesite(hit)));
+
+ /* Want high quality because we already have a pretty good answer */
+ if ((gmap = align_single_hit_with_gmap(hit,queryuc_ptr,querylength,
#ifdef END_KNOWNSPLICING_SHORTCUT
- queryrc,Shortread_invertedp(queryseq),
+ queryrc,Shortread_invertedp(queryseq),
#endif
- query_compress_fwd,query_compress_rev,
- this->plus_segments,this->plus_nsegments,this->minus_segments,this->minus_nsegments,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel,genestrand,first_read_p);
-
- missing_hit = querylength - Stage3end_nmatches_posttrim(hit);
- for (a = gmap_hits; a != NULL; a = List_next(a)) {
- gmap = (Stage3end_T) List_head(a);
- missing_gmap = querylength - Stage3end_nmatches_posttrim(gmap);
- if (Stage3end_score(gmap) > opt_level + gmap_allowance) {
- debug13(printf("Score is only %d vs opt_level %d\n",Stage3end_score(gmap),opt_level));
- Stage3end_free(&gmap);
- } else if (missing_gmap < missing_hit/2) {
- debug13(printf("GMAP with %d matches, %d missing is significantly better than terminal with %d matches, %d missing\n",
- Stage3end_nmatches_posttrim(gmap),missing_gmap,Stage3end_nmatches_posttrim(hit),missing_hit));
- good_gmap_hits = List_push(good_gmap_hits,(void *) gmap);
- Stage3end_set_improved_by_gmap(hit);
- } else {
- debug13(printf("GMAP with %d matches, %d missing is not significantly better than terminal with %d matches, %d missing\n",
- Stage3end_nmatches_posttrim(gmap),missing_gmap,Stage3end_nmatches_posttrim(hit),missing_hit));
- Stage3end_free(&gmap);
- }
- }
- List_free(&gmap_hits);
+ oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ genestrand,first_read_p)) != NULL) {
+ debug13(missing_hit = querylength - Stage3end_nmatches_posttrim(hit));
+ debug13(missing_gmap = querylength - Stage3end_nmatches_posttrim(gmap));
+ debug13(printf("GMAP %p with %d matches, %d missing compared with original hit with %d matches, %d missing\n",
+ gmap,Stage3end_nmatches_posttrim(gmap),missing_gmap,Stage3end_nmatches_posttrim(hit),missing_hit));
+ new_result = List_push(new_result,(void *) gmap);
+ Stage3end_set_improved_by_gmap(hit);
+ } else {
+ new_result = List_push(new_result,(void *) hit);
}
}
}
- debug13(printf("Have %d good GMAP hits\n",List_length(good_gmap_hits)));
+
+ for ( ; p != NULL; p = p->rest) {
+ hit = (Stage3end_T) List_head(p);
+ new_result = List_push(new_result,(void *) hit);
+ }
+
+ List_free(&result);
+ return new_result;
+}
- if (alloc_floors_p == true) {
- Floors_free(&floors);
- }
+/* Search order for single-end reads:
- /* Keep good_gmap_hits found in step 10 */
- hits = List_append(subs,
- List_append(indels,
- List_append(singlesplicing,
- List_append(longsinglesplicing,
- List_append(doublesplicing,
- List_append(shortendsplicing,
- List_append(distantsplicing,
- List_append(good_gmap_hits,terminals))))))));
+ 1. suffix array
+ 2. exact/subs, via spanning set algorithm
+ 3. subs/indels, via complete set algorithm
+ 4. segments -> single splicing
+ 5. segments -> double splicing (currently disabled)
- if (gmap_improvement_p == false) {
- debug(printf("No GMAP improvement: Before remove_overlaps at cutoff level %d: %d\n",*cutoff_level,List_length(hits)));
- hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
- querylength,/*keep_gmap_p*/true,/*finalp*/true);
- hits = Stage3end_reject_trimlengths(hits);
- hits = Stage3end_remove_overlaps(hits,/*finalp*/true);
- hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
- querylength,/*keep_gmap_p*/false,/*finalp*/true);
- hits = Stage3end_resolve_multimapping(hits);
- debug(printf("After remove_overlaps: %d\n",List_length(hits)));
+ (6). paired segments -> GMAP via segments (not applicable for single-end reads)
+ 7. distant splicing (needs to be before terminals, or we won't find them)
+ 8. terminals
+
+ (9). if still no concordance: GMAP pairsearch (not applicable for single-end reads)
+ 9. if found score is low: anchor segments -> GMAP via segments
+
+ 10. GMAP improvement
+*/
+
+
+/* done_level should probably be renamed final_level. opt_level
+ should probably be renamed found_level or opt_level. */
+static List_T
+align_end (int *cutoff_level, History_T gmap_history, T this,
+ Compress_T query_compress_fwd, Compress_T query_compress_rev,
+ char *accession, char *queryuc_ptr, char *queryrc, int querylength, int query_lastpos,
+ Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev, int indexdb_size_threshold, Floors_T *floors_array,
+
+ Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+ Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+ Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+
+ int user_maxlevel, int indel_penalty_middle, int indel_penalty_end,
+ int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
+ bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
+ bool allvalidp, bool keep_floors_p, int genestrand, bool first_read_p) {
+ List_T hits, greedy = NULL, subs = NULL, terminals = NULL, indels = NULL, new_indels,
+ ambiguous = NULL, singlesplicing = NULL, doublesplicing = NULL, shortendsplicing = NULL,
+ longsinglesplicing = NULL, distantsplicing = NULL, gmap_hits = NULL;
+ List_T plus_anchor_segments = NULL, minus_anchor_segments = NULL;
+ List_T p, a;
+ Stage3end_T hit, gmap;
+ int nmisses_allowed_sarray;
+ int found_score, done_level, opt_level, fast_level, mismatch_level, nmismatches, max_mismatches_allowed;
+ int max_splice_mismatches, i;
+ int missing_hit, missing_gmap;
+ int nhits = 0, nsplicepairs = 0;
+ List_T *startfrags_plus, *endfrags_plus, *startfrags_minus, *endfrags_minus;
+ List_T *donors_plus, *antidonors_plus, *acceptors_plus, *antiacceptors_plus,
+ *donors_minus, *antidonors_minus, *acceptors_minus, *antiacceptors_minus;
+ bool any_omitted_p, ambiguousp, alloc_floors_p = false, floors_computed_p = false;
+ Floors_T floors;
+ bool spanningsetp, completesetp, gmapp;
+ bool segments_computed_p = false, gmap_better_p, extend_left_p, extend_right_p;
+ Indexdb_T plus_indexdb, minus_indexdb;
+ if (genestrand == +2) {
+ plus_indexdb = indexdb_rev;
+ minus_indexdb = indexdb_fwd;
} else {
- debug(printf("GMAP improvement: Before remove_overlaps at cutoff level %d: %d\n",*cutoff_level,List_length(hits)));
- hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
- querylength,/*keep_gmap_p*/true,/*finalp*/false);
- /* Don't reject based on trimlength until after GMAP improvements */
- hits = Stage3end_remove_overlaps(hits,/*finalp*/false);
- hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
- querylength,/*keep_gmap_p*/false,/*finalp*/false);
- hits = Stage3end_resolve_multimapping(hits);
- debug(printf("After remove_overlaps: %d\n",List_length(hits)));
-
- hits = align_singleend_with_gmap(gmap_history,hits,this,query_compress_fwd,query_compress_rev,
- queryuc_ptr,querylength,query_lastpos,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel,*cutoff_level,
- first_read_p);
- hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
- querylength,/*keep_gmap_p*/true,/*finalp*/true);
- hits = Stage3end_reject_trimlengths(hits);
+ plus_indexdb = indexdb_fwd;
+ minus_indexdb = indexdb_rev;
+ }
+
+ found_score = querylength;
+ fast_level = (querylength + index1interval - 1)/spansize - NREQUIRED_FAST;
+ debug(printf("fast_level %d = (querylength %d + index1interval %d - 1)/spansize %d - nrequired_fast %d\n",
+ fast_level,querylength,index1interval,spansize,NREQUIRED_FAST));
+
+#if 0
+ /* This prevents complete_mm procedure, needed for short reads */
+ if (fast_level < 1 && user_maxlevel < 0) {
+ debug(printf("Changing fast_level to 0\n"));
+ fast_level = 1; /* Do at least 1 mismatch */
+ }
+#endif
+
+ if (user_maxlevel >= 0) {
+ *cutoff_level = user_maxlevel;
+ } else if (fast_level >= 0) {
+ *cutoff_level = fast_level;
+ } else {
+ *cutoff_level = 0;
+ }
+ debug(printf("cutoff_level = %d\n",*cutoff_level));
+
+ if (user_maxlevel < 0) {
+ if (fast_level >= 0) {
+ user_maxlevel = fast_level;
+ } else {
+ user_maxlevel = 0;
+ }
+ }
+ debug(printf("user_maxlevel = %d\n",user_maxlevel));
+
+#if 0
+ if (dibasep) {
+ opt_level = querylength; /* Allow extra because color errors may exceed nt errors */
+ }
+#endif
+ opt_level = user_maxlevel;
+ done_level = user_maxlevel /* + subopt_levels. -- Initially the same */;
+ debug(printf("0> opt_level %d, done_level %d\n",opt_level,done_level));
+
+ nhits = 0;
+
+ nmisses_allowed_sarray = *cutoff_level;
+
+#ifndef LARGE_GENOMES
+ if (use_only_sarray_p == true) {
+ hits = Sarray_search_greedy(&(*cutoff_level),
+ queryuc_ptr,queryrc,querylength,query_compress_fwd,query_compress_rev,maxpeelback,pairpool,
+ dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
+ nmisses_allowed_sarray,genestrand,first_read_p);
+
hits = Stage3end_remove_overlaps(hits,/*finalp*/true);
hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
querylength,/*keep_gmap_p*/false,/*finalp*/true);
hits = Stage3end_resolve_multimapping(hits);
+
+ hits = Stage3end_remove_circular_alias(hits);
+ hits = Stage3end_remove_duplicates(hits); /* Aliases can cause duplicates */
+
+ return hits;
}
+#endif
+
- hits = Stage3end_remove_circular_alias(hits);
- hits = Stage3end_remove_duplicates(hits); /* Aliases can cause duplicates */
-
- return hits;
-}
-
-
-static Stage3end_T *
-single_read (int *npaths, int *first_absmq, int *second_absmq,
- Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev,
- int indexdb_size_threshold, Genome_T genome, Floors_T *floors_array,
- double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
- bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
- int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
- Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
- Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
- Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- bool keep_floors_p) {
- Stage3end_T *stage3array;
- History_T gmap_history;
- List_T hits = NULL;
- T this = NULL;
- int user_maxlevel;
- int querylength, query_lastpos, cutoff_level;
- char *queryuc_ptr, *quality_string;
- Compress_T query_compress_fwd = NULL, query_compress_rev = NULL;
- bool allvalidp;
-
-#ifdef HAVE_ALLOCA
- char *queryrc;
+ /* Search 1: Suffix array */
+ completesetp = true;
+#ifdef LARGE_GENOMES
+ spanningsetp = true;
#else
- char queryrc[MAX_READLENGTH+1];
+ if (use_sarray_p == false) {
+ spanningsetp = true;
+ } else {
+ spanningsetp = false; /* Suffix array search replaces spanning set */
+
+ debug(printf("Trying suffix array\n"));
+ greedy = Sarray_search_greedy(&found_score,
+ queryuc_ptr,queryrc,querylength,query_compress_fwd,query_compress_rev,maxpeelback,pairpool,
+ dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
+ nmisses_allowed_sarray,genestrand,first_read_p);
+
+ opt_level = (found_score < opt_level) ? found_score : opt_level;
+ if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
+ done_level = user_maxlevel;
+ }
+ debug(printf("SA> found_score %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
+
+ }
#endif
+
- if ((querylength = Shortread_fulllength(queryseq)) < min_readlength) {
- fprintf(stderr,"Read %s has length %d < min_readlength %d. Skipping.\n",
- Shortread_accession(queryseq),querylength,min_readlength);
- /* fprintf(stderr,"You may want to build a genomic index with a smaller k-mer value using the -k flag to gmap_build\n"); */
- *npaths = 0;
- return (Stage3end_T *) NULL;
+ /* Search 2: Exact/subs via spanning set */
-#ifndef HAVE_ALLOCA
- } else if (querylength > MAX_READLENGTH) {
- fprintf(stderr,"Read %s has length %d > MAX_READLENGTH %d. Either run configure and make again with a higher value of MAX_READLENGTH, or consider using GMAP instead.\n",
- Shortread_accession(queryseq),querylength,MAX_READLENGTH);
- *npaths = 0;
- return (Stage3end_T *) NULL;
-#endif
+ if (spanningsetp == true) {
+ read_oligos(&allvalidp,this,queryuc_ptr,querylength,query_lastpos,/*genestrand*/0,
+ /*first_read_p*/true);
- } else {
- if (user_maxlevel_float < 0.0) {
- user_maxlevel = -1;
- } else if (user_maxlevel_float > 0.0 && user_maxlevel_float < 1.0) {
- user_maxlevel = (int) rint(user_maxlevel_float * (double) querylength);
+ /* 1. Exact. Requires compress if cmet or genomealt. Creates and uses spanning set. */
+ mismatch_level = 0;
+ if (done_level == 0 && snpp == false) {
+ debug(printf("Suffix array already found exact matches and no SNPs, so spanning set can't do any better\n"));
+ } else if (allvalidp == false) {
+ debug(printf("Not all oligos are valid, so cannot perform spanning set\n"));
+ fast_level = -1;
+ spanningsetp = false;
} else {
- user_maxlevel = (int) user_maxlevel_float;
+ debug(printf("fast_level = %d\n",fast_level));
+ debug(printf("*** Stage 1. Exact ***\n"));
+ subs = find_spanning_exact_matches(&found_score,&nhits,/*hits*/NULL,this,genestrand,first_read_p,
+ querylength,query_lastpos,plus_indexdb,minus_indexdb,
+ query_compress_fwd,query_compress_rev);
+ opt_level = (found_score < opt_level) ? found_score : opt_level;
+ if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
+ done_level = user_maxlevel;
+ }
+ mismatch_level = 1;
+ debug(printf("1> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
}
- /* Limit search on repetitive sequences */
- queryuc_ptr = Shortread_fullpointer_uc(queryseq);
- quality_string = Shortread_quality_string(queryseq);
- if (check_dinucleotides(queryuc_ptr,querylength) == false) {
- user_maxlevel = 0;
+ /* 2. One mismatch. Requires spanning set and compress. */
+ if (allvalidp && querylength >= one_miss_querylength && done_level >= 1) {
+ debug(printf("*** Stage 2. One miss ***\n"));
+ subs = find_spanning_onemiss_matches(&found_score,&nhits,subs,this,genestrand,first_read_p,
+ querylength,query_compress_fwd,query_compress_rev);
+ opt_level = (found_score < opt_level) ? found_score : opt_level;
+ if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
+ done_level = user_maxlevel;
+ }
+ mismatch_level = 2;
+ debug(printf("2> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
}
- query_compress_fwd = Compress_new_fwd(queryuc_ptr,querylength);
- query_compress_rev = Compress_new_rev(queryuc_ptr,querylength);
-#ifdef HAVE_ALLOCA
- queryrc = (char *) ALLOCA((querylength+1)*sizeof(int));
-#endif
- make_complement_buffered(queryrc,queryuc_ptr,querylength);
-
- this = Stage1_new(querylength);
- query_lastpos = querylength - index1part;
+ /* 3. Mismatches via spanning set. Requires spanning set and compress. */
+ if (allvalidp && done_level >= 2) {
+ while (mismatch_level <= fast_level && mismatch_level <= done_level) {
+ debug(printf("*** Stage 3 (level %d). Spanning set mismatches ***\n",mismatch_level));
+ subs = find_spanning_multimiss_matches(&found_score,&nhits,subs,this,genestrand,first_read_p,
+ NREQUIRED_FAST,querylength,query_compress_fwd,query_compress_rev,
+ /*nmisses_allowed*/mismatch_level);
+ opt_level = (found_score < opt_level) ? found_score : opt_level;
+ if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
+ done_level = user_maxlevel;
+ }
+ mismatch_level++;
+ debug(printf("3> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
+ }
+ }
+ }
- if (read_oligos(&allvalidp,this,queryuc_ptr,querylength,query_lastpos,/*genestrand*/0,
- /*first_read_p*/true) == 0) {
- debug(printf("Aborting because no hits found anywhere\n"));
- *npaths = 0;
- Compress_free(&query_compress_fwd);
- Compress_free(&query_compress_rev);
- Stage1_free(&this,querylength);
- return (Stage3end_T *) NULL;
- } else {
+ /* Search 3: Subs/indels via complete set */
- gmap_history = History_new();
- hits = align_end(&cutoff_level,gmap_history,this,
- query_compress_fwd,query_compress_rev,
- queryuc_ptr,queryrc,querylength,query_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp,keep_floors_p,/*genestrand*/0,/*first_read_p*/true);
+ /* 4, 5. Complete set mismatches and indels, omitting frequent oligos */
+ if (found_score <= done_level) {
+ debug(printf("Test for completeset: false because found_score %d >done_level %d\n",found_score,done_level));
+ completesetp = false;
+ }
- if ((*npaths = List_length(hits)) == 0) {
- stage3array = (Stage3end_T *) NULL;
- } else {
- stage3array = (Stage3end_T *) List_to_array_out(hits,NULL); List_free(&hits); /* Return value */
- stage3array = Stage3end_eval_and_sort(&(*npaths),&(*first_absmq),&(*second_absmq),
- stage3array,maxpaths_search,queryseq,
- query_compress_fwd,query_compress_rev,
- genome,quality_string,/*displayp*/true);
- }
+ if (completesetp == true) {
+ if (this->read_oligos_p == false) {
+ read_oligos(&allvalidp,this,queryuc_ptr,querylength,query_lastpos,/*genestrand*/0,
+ /*first_read_p*/true);
+ }
- History_free(&gmap_history);
- Compress_free(&query_compress_fwd);
- Compress_free(&query_compress_rev);
- Stage1_free(&this,querylength);
- return stage3array;
+ floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,querylength,query_lastpos,
+ plus_indexdb,minus_indexdb,indexdb_size_threshold,max_end_insertions,
+ /*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
+ floors_computed_p = true;
+ complete_set_mm_indels(&found_score,&segments_computed_p,
+ &plus_anchor_segments,&minus_anchor_segments,
+ &opt_level,&done_level,user_maxlevel,/*revise_levels_p*/true,
+ &nhits,&subs,&indels,this,query_compress_fwd,query_compress_rev,
+#if defined(DEBUG2) || defined(DEBUG2E)
+ queryuc_ptr,queryrc,
+#endif
+ querylength,query_lastpos,floors,indel_penalty_middle,indel_penalty_end,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ fast_level,genestrand,first_read_p);
+ if (found_score <= done_level) {
+ debug(printf("Test for completeset: false because found_score %d >done_level %d\n",found_score,done_level));
+ completesetp = false;
}
}
-}
-static Stage3end_T *
-single_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_absmq,
- Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev,
- int indexdb_size_threshold, Genome_T genome, Floors_T *floors_array,
- double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
- bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
- int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
- Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
- Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
- Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- bool keep_floors_p) {
- Stage3end_T *stage3array;
- History_T gmap_history;
- List_T hits, hits_geneplus = NULL, hits_geneminus = NULL;
- T this_geneplus = NULL, this_geneminus = NULL;
- int user_maxlevel;
- int querylength, query_lastpos, cutoff_level;
- char *queryuc_ptr, *quality_string;
- Compress_T query_compress_fwd = NULL, query_compress_rev = NULL;
- bool allvalidp;
+ /* Search 4: Segments -> single splicing */
-#ifdef HAVE_ALLOCA
- char *queryrc;
+ /* 6/7/8/9. Splicing. Requires compress and all positions fetched */
+ /* SPEED: For more hits, turn off first branch */
+ if (use_sarray_p == true && completesetp == false) {
+ /* Skip. Suffix array already found something. Also, get memory errors if run both algorithms. */
+
+ } else if (knownsplicingp || novelsplicingp || find_dna_chimeras_p) {
+ /* 6. Single splicing */
+ debug(printf("Deciding whether to do singlesplicing: done_level %d >=? localsplicing_penalty %d\n",
+ done_level,localsplicing_penalty));
+ if (done_level >= localsplicing_penalty) {
+ debug(printf("*** Stage 6. Single splicing masking frequent oligos with done_level %d ***\n",done_level));
+ /* Always mask frequent oligos for splicing, which must be transcriptional */
+ if (floors_computed_p == false) {
+ floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,querylength,query_lastpos,
+ plus_indexdb,minus_indexdb,indexdb_size_threshold,max_end_insertions,
+ /*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
+ floors_computed_p = true;
+ }
+
+ if (segments_computed_p == false) {
+ this->plus_segments = identify_all_segments(&this->plus_nsegments,&plus_anchor_segments,
+ &this->plus_spliceable,&this->plus_nspliceable,
+#ifdef LARGE_GENOMES
+ this->plus_positions_high,this->plus_positions_low,
+#else
+ this->plus_positions,
+#endif
+ this->plus_npositions,this->omitted,querylength,query_lastpos,floors,
+ /*max_mismatches_allowed*/done_level,/*plusp*/true);
+ this->minus_segments = identify_all_segments(&this->minus_nsegments,&minus_anchor_segments,
+ &this->minus_spliceable,&this->minus_nspliceable,
+#ifdef LARGE_GENOMES
+ this->minus_positions_high,this->minus_positions_low,
#else
- char queryrc[MAX_READLENGTH+1];
+ this->minus_positions,
#endif
+ this->minus_npositions,this->omitted,querylength,query_lastpos,floors,
+ /*max_mismatches_allowed*/done_level,/*plusp*/false);
+ segments_computed_p = true;
+ }
+ singlesplicing = complete_set_singlesplicing(&found_score,singlesplicing,floors,this,
+ query_compress_fwd,query_compress_rev,
+ querylength,query_lastpos,
+ localsplicing_penalty,
+ /*max_mismatches_allowed*/done_level - localsplicing_penalty,
+ genestrand,first_read_p,
+ /*subs_or_indels_p*/(subs != NULL || indels != NULL) ? true : false);
- if ((querylength = Shortread_fulllength(queryseq)) < min_readlength) {
- fprintf(stderr,"Read %s has length %d < min_readlength %d. Skipping\n",
- Shortread_accession(queryseq),querylength,min_readlength);
- /* fprintf(stderr,"You may want to build a genomic index with a smaller k-mer value using the -k flag to gmap_build\n"); */
- *npaths = 0;
- return (Stage3end_T *) NULL;
-
-#ifndef HAVE_ALLOCA
- } else if (querylength > MAX_READLENGTH) {
- fprintf(stderr,"Read %s has length %d > MAX_READLENGTH %d. Either run configure and make again with a higher value of MAX_READLENGTH, or consider using GMAP instead.\n",
- Shortread_accession(queryseq),querylength,MAX_READLENGTH);
- *npaths = 0;
- return (Stage3end_T *) NULL;
+#if 0
+ /* Mark ambiguous splices only for single-end reads */
+ singlesplicing = Stage3end_mark_ambiguous_splices(&ambiguousp,singlesplicing);
#endif
+ singlesplicing = Stage3end_optimal_score(singlesplicing,/*cutoff_level*/opt_level,subopt_levels,
+ query_compress_fwd,query_compress_rev,querylength,
+ /*keep_gmap_p*/true,/*finalp*/false);
- } else {
- if (user_maxlevel_float < 0.0) {
- user_maxlevel = -1;
- } else if (user_maxlevel_float > 0.0 && user_maxlevel_float < 1.0) {
- user_maxlevel = (int) rint(user_maxlevel_float * (double) querylength);
- } else {
- user_maxlevel = (int) user_maxlevel_float;
+ if (singlesplicing) {
+ opt_level = (found_score < opt_level) ? found_score : opt_level;
+ if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
+ done_level = user_maxlevel;
+ }
+ }
}
- this_geneplus = Stage1_new(querylength);
- this_geneminus = Stage1_new(querylength);
- queryuc_ptr = Shortread_fullpointer_uc(queryseq);
- quality_string = Shortread_quality_string(queryseq);
- query_lastpos = querylength - index1part;
+ /* Search 5: Segments -> double splicing (currently disabled) */
+#ifdef PERFORM_DOUBLESPLICING
+ /* 7. Double splicing */
+ debug(printf("Deciding whether to do doublesplicing: done_level %d >=? localsplicing_penalty %d\n",
+ done_level,localsplicing_penalty));
+ if (done_level >= localsplicing_penalty) {
+ debug(printf("*** Stage 7. Double splicing masking frequent oligos with done_level %d ***\n",done_level));
+ if (floors_computed_p == false) {
+ floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,querylength,query_lastpos,
+ plus_indexdb,minus_indexdb,indexdb_size_threshold,max_end_insertions,
+ /*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
+ floors_computed_p = true;
+ }
+ doublesplicing = complete_set_doublesplicing(&found_score,doublesplicing,floors,this,
+ query_compress_fwd,query_compress_rev,
+ queryuc_ptr,queryrc,querylength,query_lastpos,
+ localsplicing_penalty,min_shortend,
+ /*max_mismatches_allowed*/done_level - localsplicing_penalty,
+ /*pairedp*/false,genestrand,first_read_p,
+ /*subs_or_indels_p*/(subs != NULL || indels != NULL) ? true : false);
+
+#if 0
+ /* Mark ambiguous splices only for single-end reads */
+ doublesplicing = Stage3end_mark_ambiguous_splices(&ambiguousp,doublesplicing);
+#endif
+ doublesplicing = Stage3end_optimal_score(doublesplicing,/*cutoff_level*/opt_level,subopt_levels,
+ query_compress_fwd,query_compress_rev,querylength,
+ /*keep_gmap_p*/true,/*finalp*/false);
- /* Limit search on repetitive sequences */
- if (check_dinucleotides(queryuc_ptr,querylength) == false) {
- user_maxlevel = 0;
+ if (doublesplicing) {
+ opt_level = (found_score < opt_level) ? found_score : opt_level;
+ if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
+ done_level = user_maxlevel;
+ }
+ }
}
+#endif
- query_compress_fwd = Compress_new_fwd(queryuc_ptr,querylength);
- query_compress_rev = Compress_new_rev(queryuc_ptr,querylength);
- gmap_history = History_new();
-#ifdef HAVE_ALLOCA
- queryrc = (char *) ALLOCA((querylength+1)*sizeof(char));
-#endif
- make_complement_buffered(queryrc,queryuc_ptr,querylength);
-
- if (read_oligos(&allvalidp,this_geneplus,queryuc_ptr,querylength,query_lastpos,/*genestrand*/+1,
- /*first_read_p*/true) > 0) {
- hits_geneplus = align_end(&cutoff_level,gmap_history,this_geneplus,
- query_compress_fwd,query_compress_rev,
- queryuc_ptr,queryrc,querylength,query_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- floors_array,oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp,keep_floors_p,/*genestrand*/+1,/*first_read_p*/true);
- }
-
- if (read_oligos(&allvalidp,this_geneminus,queryuc_ptr,querylength,query_lastpos,/*genestrand*/+2,
- /*first_read_p*/true) > 0) {
- hits_geneminus = align_end(&cutoff_level,gmap_history,this_geneminus,
- query_compress_fwd,query_compress_rev,
- queryuc_ptr,queryrc,querylength,query_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- floors_array,oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp,keep_floors_p,/*genestrand*/+2,/*first_read_p*/true);
- }
+ if (knownsplicingp == true && done_level >= localsplicing_penalty) {
+ /* Want >= and not > to give better results. Negligible effect on speed. */
+ /* 8. Shortend splicing */
- hits = List_append(hits_geneplus,hits_geneminus);
- hits = Stage3end_optimal_score(hits,cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
- querylength,/*keep_gmap_p*/true,/*finalp*/true);
- hits = Stage3end_reject_trimlengths(hits);
- hits = Stage3end_remove_overlaps(hits,/*finalp*/true);
- hits = Stage3end_optimal_score(hits,cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
- querylength,/*keep_gmap_p*/false,/*finalp*/true);
- hits = Stage3end_resolve_multimapping(hits);
+ max_splice_mismatches = done_level - localsplicing_penalty;
+ debug(printf("*** Stage 8. Short-end splicing, allowing %d mismatches ***\n",max_splice_mismatches));
- if ((*npaths = List_length(hits)) == 0) {
- stage3array = (Stage3end_T *) NULL;
- } else {
- stage3array = (Stage3end_T *) List_to_array_out(hits,NULL); List_free(&hits); /* Return value */
- stage3array = Stage3end_eval_and_sort(&(*npaths),&(*first_absmq),&(*second_absmq),
- stage3array,maxpaths_search,queryseq,
- query_compress_fwd,query_compress_rev,
- genome,quality_string,/*displayp*/true);
- }
-
- History_free(&gmap_history);
- Compress_free(&query_compress_fwd);
- Compress_free(&query_compress_rev);
- Stage1_free(&this_geneminus,querylength);
- Stage1_free(&this_geneplus,querylength);
- return stage3array;
- }
-}
+ donors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ antidonors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ acceptors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ antiacceptors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ donors_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ antidonors_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ acceptors_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ antiacceptors_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ if (floors_computed_p == false) {
+ floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,querylength,query_lastpos,
+ plus_indexdb,minus_indexdb,indexdb_size_threshold,max_end_insertions,
+ /*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
+ floors_computed_p = true;
+ }
-Stage3end_T *
-Stage1_single_read (int *npaths, int *first_absmq, int *second_absmq,
- Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev,
- int indexdb_size_threshold, Genome_T genome, Floors_T *floors_array,
- double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
- bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
- int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
- Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
- Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
- Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- bool keep_floors_p) {
+ debug(printf("Starting find_spliceends (plus) with %d anchor segments\n",List_length(plus_anchor_segments)));
+ find_spliceends_shortend(&donors_plus,&antidonors_plus,&acceptors_plus,&antiacceptors_plus,
+ plus_anchor_segments,
+#ifdef DEBUG4E
+ /*queryptr*/queryuc_ptr,
+#endif
+ floors,querylength,query_lastpos,/*query_compress*/query_compress_fwd,
+ max_splice_mismatches,/*plusp*/true,genestrand,first_read_p);
+ debug(printf("Finished find_spliceends (plus)\n"));
+
+ debug(printf("Starting find_spliceends (minus) with %d anchor segments\n",List_length(minus_anchor_segments)));
+ find_spliceends_shortend(&antidonors_minus,&donors_minus,&antiacceptors_minus,&acceptors_minus,
+ minus_anchor_segments,
+#ifdef DEBUG4E
+ /*queryptr*/queryrc,
+#endif
+ floors,querylength,query_lastpos,/*query_compress*/query_compress_rev,
+ max_splice_mismatches,/*plusp*/false,genestrand,first_read_p);
+ debug(printf("Finished find_spliceends (minus)\n"));
+
+ shortendsplicing = find_splicepairs_shortend(&found_score,shortendsplicing,
+ donors_plus,antidonors_plus,
+ acceptors_plus,antiacceptors_plus,
+ donors_minus,antidonors_minus,
+ acceptors_minus,antiacceptors_minus,
+ query_compress_fwd,query_compress_rev,
+ queryuc_ptr,queryrc,min_shortend,
+ localsplicing_penalty,
+ /*max_mismatches_allowed*/max_splice_mismatches,querylength,
+ /*pairedp*/false,genestrand,first_read_p);
+ opt_level = (found_score < opt_level) ? found_score : opt_level;
+ if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
+ done_level = user_maxlevel;
+ }
+ debug(printf("8> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
+
+ for (i = 0; i <= max_splice_mismatches; i++) {
+ substringlist_gc(&(donors_plus[i]));
+ substringlist_gc(&(antidonors_plus[i]));
+ substringlist_gc(&(acceptors_plus[i]));
+ substringlist_gc(&(antiacceptors_plus[i]));
+ substringlist_gc(&(donors_minus[i]));
+ substringlist_gc(&(antidonors_minus[i]));
+ substringlist_gc(&(acceptors_minus[i]));
+ substringlist_gc(&(antiacceptors_minus[i]));
+ }
+ FREEA(donors_plus);
+ FREEA(antidonors_plus);
+ FREEA(acceptors_plus);
+ FREEA(antiacceptors_plus);
+ FREEA(donors_minus);
+ FREEA(antidonors_minus);
+ FREEA(acceptors_minus);
+ FREEA(antiacceptors_minus);
+ }
- if (mode == STANDARD || mode == CMET_STRANDED || mode == ATOI_STRANDED) {
- return single_read(&(*npaths),&(*first_absmq),&(*second_absmq),
- queryseq,indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- genome,floors_array,user_maxlevel_float,
- indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,keep_floors_p);
- } else if (mode == CMET_NONSTRANDED || mode == ATOI_NONSTRANDED) {
- return single_read_tolerant_nonstranded(&(*npaths),&(*first_absmq),&(*second_absmq),queryseq,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- genome,floors_array,user_maxlevel_float,
- indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,keep_floors_p);
- } else {
- fprintf(stderr,"Do not recognize mode %d\n",mode);
- abort();
- }
-}
+ /* Search 7: Distant splicing */
+ if (done_level < distantsplicing_penalty) {
+ /* Want < and not <=, because otherwise distant splicing does not work on 50-bp reads */
+ /* Want <= and not <, because distant splicing needs to be better than other alternatives */
+ /* Don't find distant splicing */
+ debug(printf("Skipping distant splicing because done_level %d < distantsplicing_penalty %d\n",
+ done_level,distantsplicing_penalty));
+ } else if (find_dna_chimeras_p == true) {
+ /* 9 (DNA). Find distant splicing for DNA */
+ max_splice_mismatches = done_level - distantsplicing_penalty;
+ debug(printf("*** Stage 9 (DNA). Distant splice ends, allowing %d mismatches ***\n",max_splice_mismatches));
+ startfrags_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ endfrags_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ startfrags_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ endfrags_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
-/* #define HITARRAY_SHORTENDSPLICING 4 */
-/* #define HITARRAY_DISTANTSPLICING 4 */
+ if (floors_computed_p == false) {
+ floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,querylength,query_lastpos,
+ plus_indexdb,minus_indexdb,indexdb_size_threshold,max_end_insertions,
+ /*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
+ floors_computed_p = true;
+ }
+ debug(printf("Starting find_spliceends_distant_dna_plus\n"));
+ find_spliceends_distant_dna_plus(&startfrags_plus,&endfrags_plus,plus_anchor_segments,
+#ifdef DEBUG4E
+ /*queryptr*/queryuc_ptr,
+#endif
+ floors,querylength,query_lastpos,/*query_compress*/query_compress_fwd,
+ max_splice_mismatches,genestrand,first_read_p);
+ debug(printf("Finished find_spliceends_distant_dna_plus\n"));
-static List_T
-align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end_T hit3,
- Shortread_T queryseq5, Shortread_T queryseq3,
- char *queryuc_ptr, int querylength, int query_lastpos,
-#ifdef END_KNOWNSPLICING_SHORTCUT
- char *queryrc, bool invertedp,
+ debug(printf("Starting find_spliceends_distant_dna_minus\n"));
+ find_spliceends_distant_dna_minus(&startfrags_minus,&endfrags_minus,minus_anchor_segments,
+#ifdef DEBUG4E
+ /*queryptr*/queryrc,
#endif
- Compress_T query_compress_fwd, Compress_T query_compress_rev,
- struct Segment_T *plus_segments, int plus_nsegments,
- struct Segment_T *minus_segments, int minus_nsegments,
- Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
- Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
- Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- int pairmax, Chrpos_T shortsplicedist, int user_maxlevel,
- int genestrand, bool first_read_p) {
- List_T hits = NULL;
- int sensedir, sense_try;
- int overlap;
-
- int zero_offset = 0;
- Univcoord_T segmentstart, segmentend;
- Univcoord_T genomicbound, genomicbound2, mappingstart, mappingend,
- chroffset, chrhigh, mappingpos;
- Univcoord_T close_mappingstart_greedy, close_mappingend_greedy, close_mappingstart_last, close_mappingend_last;
- Univcoord_T middle_mappingstart_greedy, middle_mappingend_greedy, middle_mappingstart_last, middle_mappingend_last;
- Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
- Univcoord_T close_knownsplice_limit_low, close_knownsplice_limit_high;
- Chrpos_T chrlength;
- Chrnum_T chrnum;
- bool close_mappingstart_p = false, close_mappingend_p = false;
- bool middle_mappingstart_p = false, middle_mappingend_p = false;
- bool fallback_mappingstart_p, fallback_mappingend_p;
- bool good_start_p, good_end_p, watsonp, favor_right_p;
+ floors,querylength,query_lastpos,/*query_compress*/query_compress_rev,
+ max_splice_mismatches,genestrand,first_read_p);
+ debug(printf("Finished find_spliceends_distant_dna_minus\n"));
- int starti, endi, i;
+ nmismatches = 0;
+ ambiguousp = false;
+ while (longsinglesplicing == NULL &&
+ nmismatches <= done_level - distantsplicing_penalty &&
+ nsplicepairs < MAXCHIMERAPATHS && ambiguousp == false) {
+ debug(printf("*** Stage 9 (DNA). Distant splicing, allowing %d mismatches ***\n",nmismatches));
+ debug4e(printf("Sorting splice ends\n"));
+ startfrags_plus[nmismatches] = Substring_sort_chimera_halves(startfrags_plus[nmismatches],/*ascendingp*/true);
+ endfrags_plus[nmismatches] = Substring_sort_chimera_halves(endfrags_plus[nmismatches],/*ascendingp*/true);
- if (hit3 == NULL) {
- /* Both events are tested by Stage3end_anomalous_splice_p */
- if ((chrnum = Stage3end_chrnum(hit5)) == 0) {
- /* Translocation */
- return (List_T) NULL;
+ startfrags_minus[nmismatches] = Substring_sort_chimera_halves(startfrags_minus[nmismatches],/*ascendingp*/false);
+ endfrags_minus[nmismatches] = Substring_sort_chimera_halves(endfrags_minus[nmismatches],/*ascendingp*/false);
- } else if (Stage3end_hittype(hit5) == SAMECHR_SPLICE) {
- /* A genomic event that doesn't get reflected in chrnum */
- return (List_T) NULL;
+ debug4e(printf("Splice ends at %d nmismatches: +startfrags/endfrags %d/%d, -startfrags/endfrags %d/%d\n",
+ nmismatches,
+ List_length(startfrags_plus[nmismatches]),List_length(endfrags_plus[nmismatches]),
+ List_length(startfrags_minus[nmismatches]),List_length(endfrags_minus[nmismatches])));
- } else if ((watsonp = Stage3end_plusp(hit5)) == true) {
- chroffset = Stage3end_chroffset(hit5);
- chrhigh = Stage3end_chrhigh(hit5);
- chrlength = Stage3end_chrlength(hit5);
+ distantsplicing = find_splicepairs_distant_dna(&found_score,&nsplicepairs,&longsinglesplicing,distantsplicing,
+ startfrags_plus,endfrags_plus,startfrags_minus,endfrags_minus,
+ localsplicing_penalty,distantsplicing_penalty,
+ querylength,nmismatches,first_read_p);
+#if 0
+ assert(List_length(distantsplicing) <= 1);
+#endif
- if (Shortread_find_primers(queryseq5,queryseq3) == true) {
- /* Go from genomicstart */
- debug13(printf("Found primers\n"));
- genomicbound = subtract_bounded(Stage3end_genomicstart(hit5),querylength,chroffset);
+#if 0
+ /* Mark ambiguous splices only for single-end reads */
+ distantsplicing = Stage3end_mark_ambiguous_splices(&ambiguousp,distantsplicing);
+#endif
- } else if (Stage3end_anomalous_splice_p(hit5) == true) {
- /* Go from genomicstart */
- debug13(printf("Anomalous splice\n"));
- genomicbound = subtract_bounded(Stage3end_genomicstart(hit5),querylength,chroffset);
+ /* Excess distant splicing should be freed already in find_splicepairs_distant_rna */
+ debug(printf("Entering Stage3end_optimal_score with %d hits\n",List_length(distantsplicing)));
+ distantsplicing = Stage3end_optimal_score(distantsplicing,opt_level,subopt_levels,
+ query_compress_fwd,query_compress_rev,querylength,
+ /*keep_gmap_p*/true,/*finalp*/false);
+ debug(printf("Exiting Stage3end_optimal_score with %d hits\n",List_length(distantsplicing)));
- } else {
- genomicbound = subtract_bounded(Stage3end_genomicend(hit5),querylength,chroffset);
-
- /* TODO: Previously called Shortread_find_overlap. Now with Shortread_max_overlap, can optimize this code */
- if ((overlap = Shortread_max_overlap(queryseq5,queryseq3)) > 0 &&
- Stage3end_genomicbound_from_end(&genomicbound2,hit5,overlap,chroffset) == true) {
- debug13(printf("Found overlap of %d\n",overlap));
- if (genomicbound2 < genomicbound) {
- zero_offset = genomicbound - genomicbound2;
- genomicbound = genomicbound2;
+ if (distantsplicing) {
+ opt_level = (found_score < opt_level) ? found_score : opt_level;
+ if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
+ done_level = user_maxlevel;
}
+ debug(printf("9 (DNA)> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
}
+ nmismatches++;
+
}
- debug13(printf("Case 1: hit5 plus %s %u..%u (sensedir %d) => genomicbound %u\n",
- Stage3end_hittype_string(hit5),
- Stage3end_genomicstart(hit5) - chroffset,Stage3end_genomicend(hit5) - chroffset,
- Stage3end_sensedir(hit5),genomicbound - chroffset));
+ if (longsinglesplicing != NULL) {
+ debug(printf("Entering Stage3end_optimal_score with %d longsinglesplicing hits\n",List_length(longsinglesplicing)));
+ longsinglesplicing = Stage3end_optimal_score(longsinglesplicing,opt_level,subopt_levels,
+ query_compress_fwd,query_compress_rev,querylength,
+ /*keep_gmap_p*/true,/*finalp*/false);
+ debug(printf("Exiting Stage3end_optimal_score with %d hits\n",List_length(longsinglesplicing)));
- knownsplice_limit_low = mappingstart = segmentstart = genomicbound;
- knownsplice_limit_high = add_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chrhigh);
- segmentend = add_bounded(Stage3end_genomicend(hit5),pairmax,chrhigh);
-#ifdef LONG_ENDSPLICES
- mappingend = add_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chrhigh);
-#else
- mappingend = add_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist_novelend,chrhigh);
+ opt_level = (found_score < opt_level) ? found_score : opt_level;
+ if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
+ done_level = user_maxlevel;
+ }
+ debug(printf("9 (DNA)> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
+ }
+
+ for (i = 0; i <= max_splice_mismatches; i++) {
+ substringlist_gc(&(startfrags_plus[i]));
+ substringlist_gc(&(endfrags_plus[i]));
+ substringlist_gc(&(startfrags_minus[i]));
+ substringlist_gc(&(endfrags_minus[i]));
+ }
+ FREEA(startfrags_plus);
+ FREEA(endfrags_plus);
+ FREEA(startfrags_minus);
+ FREEA(endfrags_minus);
+
+ } else if (knownsplicingp || novelsplicingp) {
+ /* 9 (RNA). Find distant splicing for RNA iteratively using both known and novel splice sites */
+ max_splice_mismatches = done_level - distantsplicing_penalty;
+ debug(printf("*** Stage 9 (RNA). Distant splice ends, allowing %d mismatches ***\n",max_splice_mismatches));
+
+ donors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ antidonors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ acceptors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ antiacceptors_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ donors_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ antidonors_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ acceptors_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+ antiacceptors_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+
+ if (floors_computed_p == false) {
+ floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,querylength,query_lastpos,
+ plus_indexdb,minus_indexdb,indexdb_size_threshold,max_end_insertions,
+ /*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
+ floors_computed_p = true;
+ }
+
+ debug(printf("Starting find_spliceends_distant_rna (plus)\n"));
+ find_spliceends_distant_rna(&donors_plus,&antidonors_plus,&acceptors_plus,&antiacceptors_plus,
+ plus_anchor_segments,
+#ifdef DEBUG4E
+ /*queryptr*/queryuc_ptr,
#endif
- debug13(printf("Original bounds E: knownsplice_limit_low %u, knownsplice_limit_high %u, mappingend %u\n",
- knownsplice_limit_low - chroffset,knownsplice_limit_high - chroffset,mappingend - chroffset));
+ floors,querylength,query_lastpos,/*query_compress*/query_compress_fwd,
+ max_splice_mismatches,/*plusp*/true,genestrand,first_read_p);
+ debug(printf("Finished find_spliceends_distant_rna (plus)\n"));
- close_mappingend_last = middle_mappingend_last = Stage3end_genomicend(hit5);
- close_mappingend_greedy = middle_mappingend_greedy = segmentend;
- if (plus_nsegments > 0) {
- /* Use segments to bound */
- debug13(printf("Finding segments from segmentstart %u to segmentend %u (plus_nsegments %d)\n",
- segmentstart - chroffset,segmentend - chroffset,plus_nsegments));
- starti = endi = -1;
- i = binary_search_segments(0,plus_nsegments-1,plus_segments,segmentstart);
- while (i < plus_nsegments - 1 && plus_segments[i].diagonal == (Univcoord_T) -1) {
- i++;
- }
- starti = i;
- while (plus_segments[i].diagonal < segmentend) {
- endi = i;
- i++;
- }
- if (starti >= 0 && endi >= 0) {
- debug13(printf("starti = %d, endi = %d\n",starti,endi));
- assert(starti <= endi);
- for (i = starti; i <= endi; i++) {
- debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
- (Chrpos_T) (plus_segments[i].diagonal - chroffset),(unsigned long long) plus_segments[i].diagonal,
- plus_segments[i].querypos5,plus_segments[i].querypos3));
- if (query_lastpos - plus_segments[i].querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
- /* Case 1. Missing end of query, so there could be a middle splice */
- debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
- query_lastpos,plus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
- if ((mappingpos = add_bounded(plus_segments[i].diagonal,shortsplicedist_novelend,chrhigh)) < middle_mappingend_greedy &&
- mappingpos > genomicbound) {
- middle_mappingend_greedy = mappingpos;
- middle_mappingend_p = true;
- debug13(printf(" Redefining middle mappingend greedy to %u\n",middle_mappingend_greedy - chroffset));
- }
-#ifdef LONG_ENDSPLICES
- if ((mappingpos = add_bounded(plus_segments[i].diagonal,shortsplicedist,chrhigh)) > middle_mappingend_last) {
- /* Use > for NOT_GREEDY */
- middle_mappingend_last = mappingpos;
- middle_mappingend_p = true;
- debug13(printf(" Redefining middle mappingend last to %u\n",middle_mappingend_last - chroffset));
- }
-#else
- if (mappingpos > middle_mappingend_last) {
- /* Use > for NOT_GREEDY */
- middle_mappingend_last = mappingpos;
- middle_mappingend_p = true;
- debug13(printf(" Redefining middle mappingend last to %u\n",middle_mappingend_last - chroffset));
- }
+ debug(printf("Starting find_spliceends_distant_rna (minus)\n"));
+ find_spliceends_distant_rna(&antidonors_minus,&donors_minus,&antiacceptors_minus,&acceptors_minus,
+ minus_anchor_segments,
+#ifdef DEBUG4E
+ /*queryptr*/queryrc,
#endif
+ floors,querylength,query_lastpos,/*query_compress*/query_compress_rev,
+ max_splice_mismatches,/*plusp*/false,genestrand,first_read_p);
+ debug(printf("Finished find_spliceends_distant_rna (minus)\n"));
- } else {
- debug13b(printf(" query_lastpos %d - querypos3 %d < %d + %d, so using this diagonal\n",
- query_lastpos,plus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
- if ((mappingpos = plus_segments[i].diagonal) < close_mappingend_greedy &&
- mappingpos > genomicbound) {
- close_mappingend_greedy = mappingpos;
- close_mappingend_p = true;
- debug13(printf(" Redefining close mappingend greedy to %u\n",close_mappingend_greedy - chroffset));
- }
- if (mappingpos > close_mappingend_last) {
- /* Use > for NOT_GREEDY */
- close_mappingend_last = mappingpos;
- close_mappingend_p = true;
- debug13(printf(" Redefining close mappingend last to %u\n",close_mappingend_last - chroffset));
- }
- }
- }
- if (close_mappingend_p == true) {
- close_knownsplice_limit_high = add_bounded(close_mappingend_greedy,shortsplicedist,chrhigh);
- } else if (middle_mappingend_p == true) {
- debug13(printf("Using middle mappingend\n"));
- close_knownsplice_limit_high = middle_mappingend_greedy;
- close_mappingend_greedy = middle_mappingend_greedy;
- close_mappingend_p = true;
- }
- if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_greedy) {
- knownsplice_limit_high = middle_mappingend_last;
- mappingend = middle_mappingend_last;
- } else if (close_mappingend_p == true && close_mappingend_last != close_mappingend_greedy) {
- knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
- mappingend = close_mappingend_last;
- }
- if (close_mappingend_p == false) {
- fallback_mappingend_p = false;
- } else if (mappingend <= close_mappingend_greedy) {
- fallback_mappingend_p = false;
- } else {
- debug13(printf("Fallback mappingend = %u\n",mappingend - chroffset));
- fallback_mappingend_p = true;
- }
- }
- }
+ nmismatches = 0;
+ ambiguousp = false;
+ while (longsinglesplicing == NULL &&
+ nmismatches <= done_level - distantsplicing_penalty &&
+ nsplicepairs < MAXCHIMERAPATHS && ambiguousp == false) {
+ debug(printf("*** Stage 9 (RNA). Distant splicing, allowing %d mismatches ***\n",nmismatches));
- favor_right_p = false;
+ debug4e(printf("Sorting splice ends\n"));
+ donors_plus[nmismatches] = Substring_sort_chimera_halves(donors_plus[nmismatches],/*ascendingp*/true);
+ acceptors_plus[nmismatches] = Substring_sort_chimera_halves(acceptors_plus[nmismatches],/*ascendingp*/true);
- } else {
- chroffset = Stage3end_chroffset(hit5);
- chrhigh = Stage3end_chrhigh(hit5);
- chrlength = Stage3end_chrlength(hit5);
+ antidonors_plus[nmismatches] = Substring_sort_chimera_halves(antidonors_plus[nmismatches],/*ascendingp*/false);
+ antiacceptors_plus[nmismatches] = Substring_sort_chimera_halves(antiacceptors_plus[nmismatches],/*ascendingp*/false);
- if (Shortread_find_primers(queryseq5,queryseq3) == true) {
- /* Go from genomicstart */
- debug13(printf("Found primers\n"));
- genomicbound = add_bounded(Stage3end_genomicstart(hit5),querylength,chrhigh);
+ donors_minus[nmismatches] = Substring_sort_chimera_halves(donors_minus[nmismatches],/*ascendingp*/false);
+ acceptors_minus[nmismatches] = Substring_sort_chimera_halves(acceptors_minus[nmismatches],/*ascendingp*/false);
- } else if (Stage3end_anomalous_splice_p(hit5) == true) {
- /* Go from genomicstart */
- debug13(printf("Anomalous splice\n"));
- genomicbound = add_bounded(Stage3end_genomicstart(hit5),querylength,chrhigh);
+ antidonors_minus[nmismatches] = Substring_sort_chimera_halves(antidonors_minus[nmismatches],/*ascendingp*/true);
+ antiacceptors_minus[nmismatches] = Substring_sort_chimera_halves(antiacceptors_minus[nmismatches],/*ascendingp*/true);
- } else {
- genomicbound = add_bounded(Stage3end_genomicend(hit5),querylength,chrhigh);
-
- /* TODO: Previously called Shortread_find_overlap. Now with Shortread_max_overlap, can optimize this code */
- if ((overlap = Shortread_max_overlap(queryseq5,queryseq3)) > 0 &&
- Stage3end_genomicbound_from_end(&genomicbound2,hit5,overlap,chroffset) == true) {
- debug13(printf("Found overlap of %d\n",overlap));
- if (genomicbound2 > genomicbound) {
- zero_offset = genomicbound2 - genomicbound;
- genomicbound = genomicbound2;
- }
- }
- }
+ debug4e(printf("Splice ends at %d nmismatches: +donors/acceptors %d/%d, +antidonors/antiacceptors %d/%d, -donors/acceptors %d/%d, -antidonors/antiacceptors %d/%d\n",
+ nmismatches,
+ List_length(donors_plus[nmismatches]),List_length(acceptors_plus[nmismatches]),
+ List_length(antidonors_plus[nmismatches]),List_length(antiacceptors_plus[nmismatches]),
+ List_length(donors_minus[nmismatches]),List_length(acceptors_minus[nmismatches]),
+ List_length(antidonors_minus[nmismatches]),List_length(antiacceptors_minus[nmismatches])));
- debug13(printf("Case 2: hit5 minus %s %u..%u (sensedir %d) => genomicbound %u\n",
- Stage3end_hittype_string(hit5),
- Stage3end_genomicstart(hit5) - chroffset,Stage3end_genomicend(hit5) - chroffset,
- Stage3end_sensedir(hit5),genomicbound - chroffset));
+ distantsplicing = find_splicepairs_distant_rna(&found_score,&nsplicepairs,&longsinglesplicing,distantsplicing,
+ donors_plus,antidonors_plus,acceptors_plus,antiacceptors_plus,
+ donors_minus,antidonors_minus,acceptors_minus,antiacceptors_minus,
+ localsplicing_penalty,distantsplicing_penalty,
+ querylength,nmismatches,first_read_p);
+#if 0
+ assert(List_length(distantsplicing) <= 1);
+#endif
- knownsplice_limit_high = mappingend = segmentend = genomicbound;
- knownsplice_limit_low = subtract_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chroffset);
- segmentstart = subtract_bounded(Stage3end_genomicend(hit5),pairmax,chroffset);
-#ifdef LONG_ENDSPLICES
- mappingstart = subtract_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chroffset);
-#else
- mappingstart = subtract_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist_novelend,chroffset);
-#endif
- debug13(printf("Original bounds F: knownsplice_limit_low %u, knownsplice_limit_high %u, mappingstart %u\n",
- knownsplice_limit_low - chroffset,knownsplice_limit_high - chroffset,mappingstart - chroffset));
-
- close_mappingstart_last = middle_mappingstart_last = Stage3end_genomicend(hit5);
- close_mappingstart_greedy = middle_mappingstart_greedy = segmentstart;
-
- if (minus_nsegments > 0) {
- /* Use segments to bound */
- debug13(printf("Finding segments from segmentstart %u to segmentend %u (minus_nsegments %d)\n",
- segmentstart - chroffset,segmentend - chroffset,minus_nsegments));
- starti = endi = -1;
- i = binary_search_segments(0,minus_nsegments-1,minus_segments,segmentend);
- while (i >= 0 && minus_segments[i].diagonal >= segmentend) {
- i--;
- }
- starti = i;
- while (i >= 0 && minus_segments[i].diagonal > segmentstart) {
- if (minus_segments[i].diagonal < (Univcoord_T) -1) {
- endi = i;
- }
- i--;
- }
- if (starti >= 0 && endi >= 0) {
- debug13(printf("starti = %d, endi = %d\n",starti,endi));
- assert(starti >= endi);
- for (i = starti; i >= endi; i--) {
- debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
- (Chrpos_T) (minus_segments[i].diagonal - chroffset),(unsigned long long) minus_segments[i].diagonal,
- minus_segments[i].querypos5,minus_segments[i].querypos3));
- if (query_lastpos - minus_segments[i].querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
- /* Case 2. Missing end of query, so there could be a middle splice */
- debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
- query_lastpos,minus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
- if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength + shortsplicedist_novelend,chroffset)) > middle_mappingstart_greedy &&
- mappingpos < genomicbound) {
- middle_mappingstart_greedy = mappingpos;
- middle_mappingstart_p = true;
- debug13(printf(" Redefining middle mappingstart greedy to %u\n",middle_mappingstart_greedy - chroffset));
- }
-#ifdef LONG_ENDSPLICES
- if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength + shortsplicedist,chroffset)) < middle_mappingstart_last) {
- /* Use < for NOT_GREEDY */
- middle_mappingstart_last = mappingpos;
- middle_mappingstart_p = true;
- debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
- }
-#else
- if (mappingpos < middle_mappingstart_last) {
- /* Use < for NOT_GREEDY */
- middle_mappingstart_last = mappingpos;
- middle_mappingstart_p = true;
- debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
- }
+#if 0
+ /* Mark ambiguous splices only for single-end reads */
+ distantsplicing = Stage3end_mark_ambiguous_splices(&ambiguousp,distantsplicing);
#endif
- } else {
- debug13b(printf(" query_lastpos %d - querypos3 %d < %d + %d, so using this diagonal\n",
- query_lastpos,minus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
- if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength,chroffset)) > close_mappingstart_greedy &&
- mappingpos < genomicbound) {
- close_mappingstart_greedy = mappingpos;
- close_mappingstart_p = true;
- debug13(printf(" Redefining close mappingstart greedy to %u\n",close_mappingstart_greedy - chroffset));
- }
- if (mappingpos > close_mappingstart_last) {
- /* Use < for NOT_GREEDY */
- close_mappingstart_last = mappingpos;
- close_mappingstart_p = true;
- debug13(printf(" Redefining close mappingstart last to %u\n",close_mappingstart_last - chroffset));
- }
- }
- }
- if (close_mappingstart_p == true) {
- close_knownsplice_limit_low = subtract_bounded(close_mappingstart_greedy,shortsplicedist,chroffset);
- } else if (middle_mappingstart_p == true) {
- debug13(printf("Using middle mappingstart\n"));
- close_knownsplice_limit_low = middle_mappingstart_greedy;
- close_mappingstart_greedy = middle_mappingstart_greedy;
- close_mappingstart_p = true;
- }
- if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_greedy) {
- knownsplice_limit_low = middle_mappingstart_last;
- mappingstart = middle_mappingstart_last;
- } else if (close_mappingstart_p == true && close_mappingstart_last != close_mappingstart_greedy) {
- knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
- mappingstart = close_mappingstart_last;
- }
- if (close_mappingstart_p == false) {
- fallback_mappingstart_p = false;
- } else if (mappingstart >= close_mappingstart_greedy) {
- fallback_mappingstart_p = false;
- } else {
- debug13(printf("Fallback mappingstart = %u\n",mappingstart - chroffset));
- fallback_mappingstart_p = true;
+ /* Excess distant splicing should be freed already in find_splicepairs_distant_rna */
+ debug(printf("Entering Stage3end_optimal_score with %d hits\n",List_length(distantsplicing)));
+ distantsplicing = Stage3end_optimal_score(distantsplicing,opt_level,subopt_levels,
+ query_compress_fwd,query_compress_rev,querylength,
+ /*keep_gmap_p*/true,/*finalp*/false);
+ debug(printf("Exiting Stage3end_optimal_score with %d hits\n",List_length(distantsplicing)));
+
+ if (distantsplicing) {
+ opt_level = (found_score < opt_level) ? found_score : opt_level;
+ if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
+ done_level = user_maxlevel;
}
+ debug(printf("9 (RNA)> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
}
- }
-
- favor_right_p = false;
- }
-
- if ((sensedir = Stage3end_sensedir_nonamb(hit5)) == SENSE_FORWARD) {
- sense_try = +1;
- } else if (sensedir == SENSE_ANTI) {
- sense_try = -1;
- } else {
- sense_try = 0;
- }
+ nmismatches++;
- } else if (hit5 == NULL) {
- /* Both events are tested by Stage3end_anomalous_splice_p */
- if ((chrnum = Stage3end_chrnum(hit3)) == 0) {
- /* Translocation */
- return (List_T) NULL;
+ }
- } else if (Stage3end_hittype(hit3) == SAMECHR_SPLICE) {
- /* A genomic event that doesn't get reflected in chrnum */
- return (List_T) NULL;
+ if (longsinglesplicing != NULL) {
+ debug(printf("Entering Stage3end_optimal_score with %d longsinglesplicing hits\n",List_length(longsinglesplicing)));
+ longsinglesplicing = Stage3end_optimal_score(longsinglesplicing,opt_level,subopt_levels,
+ query_compress_fwd,query_compress_rev,querylength,
+ /*keep_gmap_p*/true,/*finalp*/false);
+ debug(printf("Exiting Stage3end_optimal_score with %d hits\n",List_length(longsinglesplicing)));
- } else if ((watsonp = Stage3end_plusp(hit3)) == true) {
- chroffset = Stage3end_chroffset(hit3);
- chrhigh = Stage3end_chrhigh(hit3);
- chrlength = Stage3end_chrlength(hit3);
+ opt_level = (found_score < opt_level) ? found_score : opt_level;
+ if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
+ done_level = user_maxlevel;
+ }
+ debug(printf("9 (RNA)> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
+ }
- if (Shortread_find_primers(queryseq5,queryseq3) == true) {
- /* Go from genomicend */
- debug13(printf("Found primers\n"));
- genomicbound = add_bounded(Stage3end_genomicend(hit3),querylength,chrhigh);
+ for (i = 0; i <= max_splice_mismatches; i++) {
+ substringlist_gc(&(donors_plus[i]));
+ substringlist_gc(&(antidonors_plus[i]));
+ substringlist_gc(&(acceptors_plus[i]));
+ substringlist_gc(&(antiacceptors_plus[i]));
+ substringlist_gc(&(donors_minus[i]));
+ substringlist_gc(&(antidonors_minus[i]));
+ substringlist_gc(&(acceptors_minus[i]));
+ substringlist_gc(&(antiacceptors_minus[i]));
+ }
+ FREEA(donors_plus);
+ FREEA(antidonors_plus);
+ FREEA(acceptors_plus);
+ FREEA(antiacceptors_plus);
+ FREEA(donors_minus);
+ FREEA(antidonors_minus);
+ FREEA(acceptors_minus);
+ FREEA(antiacceptors_minus);
+ }
- } else if (Stage3end_anomalous_splice_p(hit3) == true) {
- /* Go from genomicend */
- debug13(printf("Anomalous splice\n"));
- genomicbound = add_bounded(Stage3end_genomicend(hit3),querylength,chrhigh);
+ debug(printf("%d single splices, %d double splices, %d short-end splices, %d long single splices, %d distant splices\n",
+ List_length(singlesplicing),List_length(doublesplicing),
+ List_length(shortendsplicing),List_length(longsinglesplicing),
+ List_length(distantsplicing)));
+ }
- } else {
- genomicbound = add_bounded(Stage3end_genomicstart(hit3),querylength,chrhigh);
-
- /* TODO: Previously called Shortread_find_overlap. Now with Shortread_max_overlap, can optimize this code */
- if ((overlap = Shortread_max_overlap(queryseq5,queryseq3)) > 0 &&
- Stage3end_genomicbound_from_start(&genomicbound2,hit3,overlap,chroffset) == true) {
- debug13(printf("Found overlap of %d\n",overlap));
- if (genomicbound2 > genomicbound) {
- zero_offset = genomicbound2 - genomicbound;
- genomicbound = genomicbound2;
- }
- }
- }
- debug13(printf("Case 3: hit3 plus %s %u..%u (sensedir %d) => genomicbound %u\n",
- Stage3end_hittype_string(hit3),
- Stage3end_genomicstart(hit3) - chroffset,Stage3end_genomicend(hit3) - chroffset,
- Stage3end_sensedir(hit3),genomicbound - chroffset));
+ /* Search 8: Terminals */
- knownsplice_limit_high = mappingend = segmentend = genomicbound;
- knownsplice_limit_low = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chroffset);
- segmentstart = subtract_bounded(Stage3end_genomicstart(hit3),pairmax,chroffset);
-#ifdef LONG_ENDSPLICES
- mappingstart = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chroffset);
-#else
- mappingstart = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist_novelend,chroffset);
-#endif
+ if (greedy || subs || indels || singlesplicing || doublesplicing || shortendsplicing || longsinglesplicing || distantsplicing) {
+ /* Don't find terminals */
+ debug(printf("Skipping terminals because have greedy %p, subs %p, indels %p, singlesplicing %p, doublesplicing %p, or shortendsplicing %p\n",
+ greedy,subs,indels,singlesplicing,doublesplicing,shortendsplicing));
+ } else {
+ terminals = find_terminals(plus_anchor_segments,minus_anchor_segments,
+ querylength,query_lastpos,
+ query_compress_fwd,query_compress_rev,
+ /*max_mismatches_allowed*/done_level,genestrand,first_read_p);
+ }
- close_mappingstart_last = middle_mappingstart_last = Stage3end_genomicstart(hit3);
- close_mappingstart_greedy = middle_mappingstart_greedy = segmentstart;
+ debug(printf("Before GMAP:\n"));
+ debug(printf(" greedy: %d\n",List_length(greedy)));
+ debug(printf(" subs: %d\n",List_length(subs)));
+ debug(printf(" indels: %d\n",List_length(indels)));
+ debug(printf(" singlesplicing %d\n",List_length(singlesplicing)));
+ debug(printf(" doublesplicing %d\n",List_length(doublesplicing)));
+ debug(printf(" shortendsplicing: %d\n",List_length(shortendsplicing)));
+ debug(printf(" longsinglesplicing %d\n",List_length(longsinglesplicing)));
+ debug(printf(" distantsplicing: %d\n",List_length(distantsplicing)));
+ debug(printf(" terminals: %d\n",List_length(terminals)));
+ debug(printf(" done_level: %d\n",done_level));
- if (plus_nsegments > 0) {
- /* Use segments to bound */
- debug13(printf("Finding segments from segmentstart %u to segmentend %u (plus_nsegments %d)\n",
- segmentstart - chroffset,segmentend - chroffset,plus_nsegments));
- starti = endi = -1;
- i = binary_search_segments(0,plus_nsegments-1,plus_segments,segmentend);
- while (i >= 0 && plus_segments[i].diagonal >= segmentend) {
- i--;
- }
- starti = i;
- while (i >= 0 && plus_segments[i].diagonal > segmentstart) {
- if (plus_segments[i].diagonal < (Univcoord_T) -1) {
- endi = i;
- }
- i--;
- }
- if (starti >= 0 && endi >= 0) {
- debug13(printf("starti = %d, endi = %d\n",starti,endi));
- assert(starti >= endi);
- for (i = starti; i >= endi; i--) {
- debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
- (Chrpos_T) (plus_segments[i].diagonal - chroffset),(unsigned long long) plus_segments[i].diagonal,
- plus_segments[i].querypos5,plus_segments[i].querypos3));
- if (plus_segments[i].querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
- /* Case 3. Missing start of query, so there could be a middle splice */
- debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
- plus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
- if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength + shortsplicedist_novelend,chroffset)) > middle_mappingstart_greedy &&
- mappingpos < genomicbound) {
- middle_mappingstart_greedy = mappingpos;
- middle_mappingstart_p = true;
- debug13(printf(" Redefining middle mappingstart greedy to %u\n",middle_mappingstart_greedy - chroffset));
- }
-#ifdef LONG_ENDSPLICES
- if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength + shortsplicedist,chroffset)) < middle_mappingstart_last) {
- /* Use < for NOT_GREEDY */
- middle_mappingstart_last = mappingpos;
- middle_mappingstart_p = true;
- debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
- }
-#else
- if (mappingpos < middle_mappingstart_last) {
- /* Use < for NOT_GREEDY */
- middle_mappingstart_last = mappingpos;
- middle_mappingstart_p = true;
- debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
- }
+ hits = List_append(greedy,
+ List_append(subs,
+ List_append(terminals,
+ List_append(indels,
+ List_append(singlesplicing,
+ List_append(longsinglesplicing,
+ List_append(doublesplicing,
+ List_append(shortendsplicing,distantsplicing))))))));
+ /* Search 9: GMAP via segments */
+ gmapp = true;
+ if (gmap_segments_p == false) {
+ gmapp = false;
+ } else if (found_score < trigger_score_for_gmap) {
+ debug(printf("Test for stage 9: true because found_score %d >= trigger_score_for_gmap %d\n",found_score,trigger_score_for_gmap));
+ gmapp = false;
+ }
+
+ gmap_hits = (List_T) NULL;
+ if (gmapp == true) {
+ gmap_hits = convert_plus_segments_to_gmap(gmap_history,/*hits*/NULL,
+ accession,queryuc_ptr,querylength,query_lastpos,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ queryrc,Shortread_invertedp(queryseq),
+#endif
+ query_compress_fwd,query_compress_rev,
+
+ plus_anchor_segments,this->plus_segments,this->plus_nsegments,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ user_maxlevel,genestrand,first_read_p,/*require_pairing_p*/false);
+ gmap_hits = convert_minus_segments_to_gmap(gmap_history,/*hits*/gmap_hits,
+ accession,queryuc_ptr,querylength,query_lastpos,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ queryrc,Shortread_invertedp(queryseq),
#endif
+ query_compress_fwd,query_compress_rev,
+ minus_anchor_segments,this->minus_segments,this->minus_nsegments,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ user_maxlevel,genestrand,first_read_p,/*require_pairing_p*/false);
- } else {
- debug13b(printf(" querypos5 %d < %d + %d, so using this diagonal\n",
- plus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
- if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength,chroffset)) > close_mappingstart_greedy &&
- mappingpos < genomicbound) {
- close_mappingstart_greedy = mappingpos;
- close_mappingstart_p = true;
- debug13(printf(" Redefining close mappingstart greedy to %u\n",close_mappingstart_greedy - chroffset));
- }
- if (mappingpos < close_mappingstart_last) {
- /* Use < for NOT_GREEDY */
- close_mappingstart_last = mappingpos;
- close_mappingstart_p = true;
- debug13(printf(" Redefining close mappingstart last to %u\n",close_mappingstart_last - chroffset));
- }
- }
- }
+ opt_level = (found_score < opt_level) ? found_score : opt_level;
+ if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
+ done_level = user_maxlevel;
+ }
+ debug(printf("10> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
+ }
- if (close_mappingstart_p == true) {
- close_knownsplice_limit_low = subtract_bounded(close_mappingstart_greedy,shortsplicedist,chroffset);
- } else if (middle_mappingstart_p == true) {
- debug13(printf("Using middle mappingstart\n"));
- close_knownsplice_limit_low = middle_mappingstart_greedy;
- close_mappingstart_greedy = middle_mappingstart_greedy;
- close_mappingstart_p = true;
- }
- if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_greedy) {
- knownsplice_limit_low = middle_mappingstart_last;
- mappingstart = middle_mappingstart_last;
- } else if (close_mappingstart_p == true && close_mappingstart_last != close_mappingstart_greedy) {
- knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
- mappingstart = close_mappingstart_last;
- }
- if (close_mappingstart_p == false) {
- fallback_mappingstart_p = false;
- } else if (mappingstart >= close_mappingstart_greedy) {
- fallback_mappingstart_p = false;
- } else {
- debug13(printf("Fallback mappingstart = %u\n",mappingstart - chroffset));
- fallback_mappingstart_p = true;
- }
- }
- }
- favor_right_p = true;
+ /* Search 10: GMAP improvement */
+ debug13(printf("%d hits (vs max_gmap_improvement %d)\n",List_length(gmap_hits),max_gmap_improvement));
+ if (hits != NULL && gmap_improvement_p == true) {
+ /* 11. GMAP terminal */
+
+ /* This is done for paired-ends, but should not be necessary for single-end */
+ debug13(printf("Before remove overlaps at cutoff level %d: %d hits\n",opt_level,List_length(hits)));
+ hits = Stage3end_sort_bymatches(Stage3end_remove_overlaps(hits,/*finalp*/false));
+ debug13(printf("After remove overlaps: %d\n",List_length(hits)));
- } else {
- chroffset = Stage3end_chroffset(hit3);
- chrhigh = Stage3end_chrhigh(hit3);
- chrlength = Stage3end_chrlength(hit3);
+ i = 0;
+ debug13(printf("%d hits\n",List_length(hits)));
+ debug13(printf("For each hit, running GMAP on single end to match with hit\n"));
- if (Shortread_find_primers(queryseq5,queryseq3) == true) {
- /* Go from genomicend */
- debug13(printf("Found primers\n"));
- genomicbound = subtract_bounded(Stage3end_genomicend(hit3),querylength,chroffset);
+ for (p = hits; p != NULL && i < max_gmap_improvement; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ if ((gmap = align_single_hit_with_gmap(hit,queryuc_ptr,querylength,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ queryrc,Shortread_invertedp(queryseq),
+#endif
+ oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ genestrand,first_read_p)) != NULL) {
+ debug13(missing_hit = querylength - Stage3end_nmatches_posttrim(hit));
+ debug13(missing_gmap = querylength - Stage3end_nmatches_posttrim(gmap));
+ debug13(printf("GMAP %p with %d matches, %d missing compared with original terminal with %d matches, %d missing\n",
+ gmap,Stage3end_nmatches_posttrim(gmap),missing_gmap,Stage3end_nmatches_posttrim(hit),missing_hit));
+ gmap_hits = List_push(gmap_hits,(void *) gmap);
+ Stage3end_set_improved_by_gmap(hit);
+ }
+ }
+ }
+ debug13(printf("Have %d GMAP hits\n",List_length(gmap_hits)));
+
+ if (alloc_floors_p == true) {
+ Floors_free(&floors);
+ }
+
+ /* Keep gmap_hits found in search 9 and 10 */
+ if (gmap_hits != NULL) {
+ hits = List_append(hits,gmap_hits);
+ }
+
+ if (gmap_improvement_p == false) {
+ debug(printf("No GMAP improvement: Before remove_overlaps at cutoff level %d: %d\n",*cutoff_level,List_length(hits)));
+ hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
+ querylength,/*keep_gmap_p*/true,/*finalp*/true);
+ hits = Stage3end_reject_trimlengths(hits);
+ hits = Stage3end_remove_overlaps(hits,/*finalp*/true);
+ hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
+ querylength,/*keep_gmap_p*/false,/*finalp*/true);
+ hits = Stage3end_resolve_multimapping(hits);
+ debug(printf("After remove_overlaps: %d\n",List_length(hits)));
+
+ } else {
+ debug(printf("GMAP improvement: Before remove_overlaps at cutoff level %d: %d\n",*cutoff_level,List_length(hits)));
+ hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
+ querylength,/*keep_gmap_p*/true,/*finalp*/false);
+ /* Don't reject based on trimlength until after GMAP improvements */
+ hits = Stage3end_remove_overlaps(hits,/*finalp*/false);
+ hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
+ querylength,/*keep_gmap_p*/false,/*finalp*/false);
+ hits = Stage3end_resolve_multimapping(hits);
+ debug(printf("After remove_overlaps: %d\n",List_length(hits)));
+
+ hits = align_singleend_with_gmap(gmap_history,hits,this,query_compress_fwd,query_compress_rev,
+ accession,queryuc_ptr,querylength,query_lastpos,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel,*cutoff_level,
+ first_read_p);
+ hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
+ querylength,/*keep_gmap_p*/true,/*finalp*/true);
+ hits = Stage3end_reject_trimlengths(hits);
+ hits = Stage3end_remove_overlaps(hits,/*finalp*/true);
+ hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
+ querylength,/*keep_gmap_p*/false,/*finalp*/true);
+ hits = Stage3end_resolve_multimapping(hits);
+ }
+
+ hits = Stage3end_remove_circular_alias(hits);
+ hits = Stage3end_remove_duplicates(hits); /* Aliases can cause duplicates */
+
+ List_free(&plus_anchor_segments);
+ List_free(&minus_anchor_segments);
+
+ return hits;
+ }
+
+
+ static Stage3end_T *
+ single_read (int *npaths, int *first_absmq, int *second_absmq,
+ Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev,
+ int indexdb_size_threshold, Floors_T *floors_array,
+ double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
+ bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
+ int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
+ Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+ Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+ Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ bool keep_floors_p) {
+ Stage3end_T *stage3array;
+ History_T gmap_history;
+ List_T hits = NULL;
+ T this = NULL;
+ int user_maxlevel;
+ int querylength, query_lastpos, cutoff_level;
+ char *queryuc_ptr, *quality_string;
+ Compress_T query_compress_fwd = NULL, query_compress_rev = NULL;
+ bool allvalidp;
+
+ #ifdef HAVE_ALLOCA
+ char *queryrc;
+ #else
+ char queryrc[MAX_READLENGTH+1];
+ #endif
+
+ if ((querylength = Shortread_fulllength(queryseq)) < min_readlength) {
+ fprintf(stderr,"Read %s has length %d < min_readlength %d. Skipping.\n",
+ Shortread_accession(queryseq),querylength,min_readlength);
+ /* fprintf(stderr,"You may want to build a genomic index with a smaller k-mer value using the -k flag to gmap_build\n"); */
+ *npaths = 0;
+ return (Stage3end_T *) NULL;
+
+ #ifndef HAVE_ALLOCA
+ } else if (querylength > MAX_READLENGTH) {
+ fprintf(stderr,"Read %s has length %d > MAX_READLENGTH %d. Either run configure and make again with a higher value of MAX_READLENGTH, or consider using GMAP instead.\n",
+ Shortread_accession(queryseq),querylength,MAX_READLENGTH);
+ *npaths = 0;
+ return (Stage3end_T *) NULL;
+ #endif
+
+ } else {
+ if (user_maxlevel_float < 0.0) {
+ user_maxlevel = -1;
+ } else if (user_maxlevel_float > 0.0 && user_maxlevel_float < 1.0) {
+ user_maxlevel = (int) rint(user_maxlevel_float * (double) querylength);
+ } else {
+ user_maxlevel = (int) user_maxlevel_float;
+ }
+
+ /* Limit search on repetitive sequences */
+ queryuc_ptr = Shortread_fullpointer_uc(queryseq);
+ quality_string = Shortread_quality_string(queryseq);
+ if (check_dinucleotides(queryuc_ptr,querylength) == false) {
+ user_maxlevel = 0;
+ }
+
+ query_compress_fwd = Compress_new_fwd(queryuc_ptr,querylength);
+ query_compress_rev = Compress_new_rev(queryuc_ptr,querylength);
+ #ifdef HAVE_ALLOCA
+ queryrc = (char *) ALLOCA((querylength+1)*sizeof(int));
+ #endif
+ make_complement_buffered(queryrc,queryuc_ptr,querylength);
+
+ this = Stage1_new(querylength);
+ query_lastpos = querylength - index1part;
+
+ gmap_history = History_new();
+ hits = align_end(&cutoff_level,gmap_history,this,
+ query_compress_fwd,query_compress_rev,
+ Shortread_accession(queryseq),queryuc_ptr,queryrc,querylength,query_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ user_maxlevel,indel_penalty_middle,indel_penalty_end,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ allvalidp,keep_floors_p,/*genestrand*/0,/*first_read_p*/true);
+
+ if ((*npaths = List_length(hits)) == 0) {
+ stage3array = (Stage3end_T *) NULL;
+ } else {
+ stage3array = (Stage3end_T *) List_to_array_out(hits,NULL); List_free(&hits); /* Return value */
+ stage3array = Stage3end_eval_and_sort(&(*npaths),&(*first_absmq),&(*second_absmq),
+ stage3array,maxpaths_search,queryseq,queryuc_ptr,queryrc,
+ query_compress_fwd,query_compress_rev,
+ quality_string,/*displayp*/true);
+ }
+
+ History_free(&gmap_history);
+ Compress_free(&query_compress_fwd);
+ Compress_free(&query_compress_rev);
+ Stage1_free(&this,querylength);
+ return stage3array;
+ }
+ }
+
+
+ static Stage3end_T *
+ single_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_absmq,
+ Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev,
+ int indexdb_size_threshold, Floors_T *floors_array,
+ double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
+ bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
+ int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
+ Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+ Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+ Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ bool keep_floors_p) {
+ Stage3end_T *stage3array;
+ History_T gmap_history;
+ List_T hits, hits_geneplus = NULL, hits_geneminus = NULL;
+ T this_geneplus = NULL, this_geneminus = NULL;
+ int user_maxlevel;
+ int querylength, query_lastpos, cutoff_level;
+ char *queryuc_ptr, *quality_string;
+ Compress_T query_compress_fwd = NULL, query_compress_rev = NULL;
+ bool allvalidp;
+
+ #ifdef HAVE_ALLOCA
+ char *queryrc;
+ #else
+ char queryrc[MAX_READLENGTH+1];
+ #endif
+
+
+ if ((querylength = Shortread_fulllength(queryseq)) < min_readlength) {
+ fprintf(stderr,"Read %s has length %d < min_readlength %d. Skipping\n",
+ Shortread_accession(queryseq),querylength,min_readlength);
+ /* fprintf(stderr,"You may want to build a genomic index with a smaller k-mer value using the -k flag to gmap_build\n"); */
+ *npaths = 0;
+ return (Stage3end_T *) NULL;
+
+ #ifndef HAVE_ALLOCA
+ } else if (querylength > MAX_READLENGTH) {
+ fprintf(stderr,"Read %s has length %d > MAX_READLENGTH %d. Either run configure and make again with a higher value of MAX_READLENGTH, or consider using GMAP instead.\n",
+ Shortread_accession(queryseq),querylength,MAX_READLENGTH);
+ *npaths = 0;
+ return (Stage3end_T *) NULL;
+ #endif
+
+ } else {
+ if (user_maxlevel_float < 0.0) {
+ user_maxlevel = -1;
+ } else if (user_maxlevel_float > 0.0 && user_maxlevel_float < 1.0) {
+ user_maxlevel = (int) rint(user_maxlevel_float * (double) querylength);
+ } else {
+ user_maxlevel = (int) user_maxlevel_float;
+ }
+
+ this_geneplus = Stage1_new(querylength);
+ this_geneminus = Stage1_new(querylength);
+
+ queryuc_ptr = Shortread_fullpointer_uc(queryseq);
+ quality_string = Shortread_quality_string(queryseq);
+ query_lastpos = querylength - index1part;
+
+ /* Limit search on repetitive sequences */
+ if (check_dinucleotides(queryuc_ptr,querylength) == false) {
+ user_maxlevel = 0;
+ }
+
+ query_compress_fwd = Compress_new_fwd(queryuc_ptr,querylength);
+ query_compress_rev = Compress_new_rev(queryuc_ptr,querylength);
+ gmap_history = History_new();
+ #ifdef HAVE_ALLOCA
+ queryrc = (char *) ALLOCA((querylength+1)*sizeof(char));
+ #endif
+ make_complement_buffered(queryrc,queryuc_ptr,querylength);
+
+ if (read_oligos(&allvalidp,this_geneplus,queryuc_ptr,querylength,query_lastpos,/*genestrand*/+1,
+ /*first_read_p*/true) > 0) {
+ hits_geneplus = align_end(&cutoff_level,gmap_history,this_geneplus,
+ query_compress_fwd,query_compress_rev,
+ Shortread_accession(queryseq),queryuc_ptr,queryrc,querylength,query_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,
+ floors_array,oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ user_maxlevel,indel_penalty_middle,indel_penalty_end,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ allvalidp,keep_floors_p,/*genestrand*/+1,/*first_read_p*/true);
+ }
+
+ if (read_oligos(&allvalidp,this_geneminus,queryuc_ptr,querylength,query_lastpos,/*genestrand*/+2,
+ /*first_read_p*/true) > 0) {
+ hits_geneminus = align_end(&cutoff_level,gmap_history,this_geneminus,
+ query_compress_fwd,query_compress_rev,
+ Shortread_accession(queryseq),queryuc_ptr,queryrc,querylength,query_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,
+ floors_array,oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ user_maxlevel,indel_penalty_middle,indel_penalty_end,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ allvalidp,keep_floors_p,/*genestrand*/+2,/*first_read_p*/true);
+ }
+
+ hits = List_append(hits_geneplus,hits_geneminus);
+ hits = Stage3end_optimal_score(hits,cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
+ querylength,/*keep_gmap_p*/true,/*finalp*/true);
+ hits = Stage3end_reject_trimlengths(hits);
+ hits = Stage3end_remove_overlaps(hits,/*finalp*/true);
+ hits = Stage3end_optimal_score(hits,cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
+ querylength,/*keep_gmap_p*/false,/*finalp*/true);
+ hits = Stage3end_resolve_multimapping(hits);
+
+ if ((*npaths = List_length(hits)) == 0) {
+ stage3array = (Stage3end_T *) NULL;
+ } else {
+ stage3array = (Stage3end_T *) List_to_array_out(hits,NULL); List_free(&hits); /* Return value */
+ stage3array = Stage3end_eval_and_sort(&(*npaths),&(*first_absmq),&(*second_absmq),
+ stage3array,maxpaths_search,queryseq,queryuc_ptr,queryrc,
+ query_compress_fwd,query_compress_rev,
+ quality_string,/*displayp*/true);
+ }
+
+ History_free(&gmap_history);
+ Compress_free(&query_compress_fwd);
+ Compress_free(&query_compress_rev);
+ Stage1_free(&this_geneminus,querylength);
+ Stage1_free(&this_geneplus,querylength);
+ return stage3array;
+ }
+ }
+
+
+ Stage3end_T *
+ Stage1_single_read (int *npaths, int *first_absmq, int *second_absmq,
+ Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev,
+ int indexdb_size_threshold, Floors_T *floors_array,
+ double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
+ bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
+ int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
+ Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+ Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+ Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ bool keep_floors_p) {
+
+ if (mode == STANDARD || mode == CMET_STRANDED || mode == ATOI_STRANDED) {
+ return single_read(&(*npaths),&(*first_absmq),&(*second_absmq),
+ queryseq,indexdb_fwd,indexdb_rev,indexdb_size_threshold,
+ floors_array,user_maxlevel_float,
+ indel_penalty_middle,indel_penalty_end,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,keep_floors_p);
+ } else if (mode == CMET_NONSTRANDED || mode == ATOI_NONSTRANDED) {
+ return single_read_tolerant_nonstranded(&(*npaths),&(*first_absmq),&(*second_absmq),queryseq,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,
+ floors_array,user_maxlevel_float,
+ indel_penalty_middle,indel_penalty_end,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,keep_floors_p);
+ } else {
+ fprintf(stderr,"Do not recognize mode %d\n",mode);
+ abort();
+ }
+ }
- } else if (Stage3end_anomalous_splice_p(hit3) == true) {
- /* Go from genomicend */
- debug13(printf("Anomalous splice\n"));
- genomicbound = subtract_bounded(Stage3end_genomicend(hit3),querylength,chroffset);
- } else {
- genomicbound = subtract_bounded(Stage3end_genomicstart(hit3),querylength,chroffset);
-
- /* TODO: Previously called Shortread_find_overlap. Now with Shortread_max_overlap, can optimize this code */
- if ((overlap = Shortread_max_overlap(queryseq5,queryseq3)) > 0 &&
- Stage3end_genomicbound_from_start(&genomicbound2,hit3,overlap,chroffset) == true) {
- debug13(printf("Found overlap of %d\n",overlap));
- if (genomicbound2 < genomicbound) {
- zero_offset = genomicbound - genomicbound2;
- genomicbound = genomicbound2;
- }
- }
- }
- debug13(printf("Case 4: hit3 minus %s %u..%u (sensedir %d) => genomicbound %u\n",
- Stage3end_hittype_string(hit3),
- Stage3end_genomicstart(hit3) - chroffset,Stage3end_genomicend(hit3) - chroffset,
- Stage3end_sensedir(hit3),genomicbound - chroffset));
+ /* #define HITARRAY_SHORTENDSPLICING 4 */
+ /* #define HITARRAY_DISTANTSPLICING 4 */
- knownsplice_limit_low = mappingstart = segmentstart = genomicbound;
- knownsplice_limit_high = add_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chrhigh);
- segmentend = add_bounded(Stage3end_genomicstart(hit3),pairmax,chrhigh);
-#ifdef LONG_ENDSPLICES
- mappingend = add_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chrhigh);
-#else
- mappingend = add_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist_novelend,chrhigh);
-#endif
- close_mappingend_last = middle_mappingend_last = Stage3end_genomicstart(hit3);
- close_mappingend_greedy = middle_mappingend_greedy = segmentend;
+ static List_T
+ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end_T hit3,
+ Shortread_T queryseq5, Shortread_T queryseq3,
+ char *queryuc_ptr, int querylength, int query_lastpos,
+ #ifdef END_KNOWNSPLICING_SHORTCUT
+ char *queryrc, bool invertedp,
+ #endif
+ Compress_T query_compress_fwd, Compress_T query_compress_rev,
+ struct Segment_T *plus_segments, int plus_nsegments,
+ struct Segment_T *minus_segments, int minus_nsegments,
+ Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+ Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+ Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ Chrpos_T pairmax, Chrpos_T shortsplicedist, int user_maxlevel,
+ int genestrand, bool first_read_p) {
+ List_T hits = NULL;
+ int sensedir, sense_try;
+ int overlap;
+
+ int zero_offset = 0;
+ Univcoord_T segmentstart, segmentend;
+ Univcoord_T genomicbound, genomicbound2, mappingstart, mappingend,
+ chroffset, chrhigh, mappingpos;
+ #ifdef USE_GREEDY
+ Univcoord_T close_mappingstart_greedy, close_mappingend_greedy,
+ middle_mappingstart_greedy, middle_mappingend_greedy;
+ #endif
+ Univcoord_T close_mappingstart_last, close_mappingend_last,
+ middle_mappingstart_last, middle_mappingend_last;
+ Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
+ Univcoord_T close_knownsplice_limit_low, close_knownsplice_limit_high;
+ Chrpos_T chrlength;
+ Chrnum_T chrnum;
+ bool close_mappingstart_p = false, close_mappingend_p = false;
+ bool middle_mappingstart_p = false, middle_mappingend_p = false;
+ bool fallback_mappingstart_p, fallback_mappingend_p;
+ bool good_start_p, good_end_p, watsonp, favor_right_p;
+
+ int starti, endi, i;
+
+ if (hit3 == NULL) {
+ /* Both events are tested by Stage3end_anomalous_splice_p */
+ if ((chrnum = Stage3end_chrnum(hit5)) == 0) {
+ /* Translocation */
+ return (List_T) NULL;
+
+ } else if (Stage3end_hittype(hit5) == SAMECHR_SPLICE) {
+ /* A genomic event that doesn't get reflected in chrnum */
+ return (List_T) NULL;
+
+ } else if ((watsonp = Stage3end_plusp(hit5)) == true) {
+ chroffset = Stage3end_chroffset(hit5);
+ chrhigh = Stage3end_chrhigh(hit5);
+ chrlength = Stage3end_chrlength(hit5);
+
+ if (Shortread_find_primers(queryseq5,queryseq3) == true) {
+ /* Go from genomicstart */
+ debug13(printf("Found primers\n"));
+ genomicbound = Stage3end_genomicstart(hit5);
+
+ } else if (Stage3end_anomalous_splice_p(hit5) == true) {
+ /* Go from genomicstart */
+ debug13(printf("Anomalous splice\n"));
+ genomicbound = Stage3end_genomicstart(hit5);
+
+ } else {
+ genomicbound = Stage3end_genomicend(hit5);
+
+ #if 0
+ /* TODO: Previously called Shortread_find_overlap. Now with Shortread_max_overlap, can optimize this code */
+ if ((overlap = Shortread_max_overlap(queryseq5,queryseq3)) > 0 &&
+ Stage3end_genomicbound_from_end(&genomicbound2,hit5,overlap,chroffset) == true) {
+ debug13(printf("Found overlap of %d\n",overlap));
+ if (genomicbound2 < genomicbound) {
+ zero_offset = genomicbound - genomicbound2;
+ genomicbound = genomicbound2;
+ }
+ }
+ #endif
+ }
+
+ debug13(printf("Case 1: hit5 plus %s %u..%u (sensedir %d) => genomicbound %u\n",
+ Stage3end_hittype_string(hit5),
+ Stage3end_genomicstart(hit5) - chroffset,Stage3end_genomicend(hit5) - chroffset,
+ Stage3end_sensedir(hit5),genomicbound - chroffset));
+
+ knownsplice_limit_low = mappingstart = segmentstart = genomicbound;
+ knownsplice_limit_high = add_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chrhigh);
+ segmentend = add_bounded(Stage3end_genomicend(hit5),pairmax,chrhigh);
+ #ifdef LONG_ENDSPLICES
+ mappingend = add_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chrhigh);
+ #else
+ mappingend = add_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist_novelend,chrhigh);
+ #endif
+ debug13(printf("Original bounds E: knownsplice_limit_low %u, knownsplice_limit_high %u, mappingend %u\n",
+ knownsplice_limit_low - chroffset,knownsplice_limit_high - chroffset,mappingend - chroffset));
+
+ close_mappingend_last = middle_mappingend_last = Stage3end_genomicend(hit5);
+ #ifdef USE_GREEDY
+ close_mappingend_greedy = middle_mappingend_greedy = segmentend;
+ #endif
+
+ if (plus_nsegments > 0) {
+ /* Use segments to bound */
+ debug13(printf("Finding segments from segmentstart %u to segmentend %u (plus_nsegments %d)\n",
+ segmentstart - chroffset,segmentend - chroffset,plus_nsegments));
+ starti = endi = -1;
+ i = binary_search_segments(0,plus_nsegments-1,plus_segments,segmentstart);
+ while (i < plus_nsegments - 1 && plus_segments[i].diagonal == (Univcoord_T) -1) {
+ i++;
+ }
+ starti = i;
+ while (plus_segments[i].diagonal < segmentend) {
+ endi = i;
+ i++;
+ }
+ if (starti >= 0 && endi >= 0) {
+ debug13(printf("starti = %d, endi = %d\n",starti,endi));
+ assert(starti <= endi);
+ for (i = starti; i <= endi; i++) {
+ debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
+ (Chrpos_T) (plus_segments[i].diagonal - chroffset),(unsigned long long) plus_segments[i].diagonal,
+ plus_segments[i].querypos5,plus_segments[i].querypos3));
+ if (query_lastpos - plus_segments[i].querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 1. Missing end of query, so there could be a middle splice */
+ debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ query_lastpos,plus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
+ #ifdef USE_GREEDY
+ if ((mappingpos = add_bounded(plus_segments[i].diagonal,shortsplicedist_novelend,chrhigh)) < middle_mappingend_greedy &&
+ mappingpos > genomicbound) {
+ middle_mappingend_greedy = mappingpos;
+ middle_mappingend_p = true;
+ debug13(printf(" Redefining middle mappingend greedy to %u\n",middle_mappingend_greedy - chroffset));
+ }
+ #endif
+
+ #ifdef LONG_ENDSPLICES
+ if ((mappingpos = add_bounded(plus_segments[i].diagonal,shortsplicedist,chrhigh)) > middle_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ middle_mappingend_last = mappingpos;
+ middle_mappingend_p = true;
+ debug13(printf(" Redefining middle mappingend last to %u\n",middle_mappingend_last - chroffset));
+ }
+ #else
+ if ((mappingpos = plus_segments[i].diagonal) > middle_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ middle_mappingend_last = mappingpos;
+ middle_mappingend_p = true;
+ debug13(printf(" Redefining middle mappingend last to %u\n",middle_mappingend_last - chroffset));
+ }
+ #endif
+
+ } else {
+ debug13b(printf(" query_lastpos %d - querypos3 %d < %d + %d, so using this diagonal\n",
+ query_lastpos,plus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
+ #ifdef USE_GREEDY
+ if ((mappingpos = plus_segments[i].diagonal) < close_mappingend_greedy &&
+ mappingpos > genomicbound) {
+ close_mappingend_greedy = mappingpos;
+ close_mappingend_p = true;
+ debug13(printf(" Redefining close mappingend greedy to %u\n",close_mappingend_greedy - chroffset));
+ }
+ #endif
+ if ((mappingpos = plus_segments[i].diagonal) > close_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ close_mappingend_last = mappingpos;
+ close_mappingend_p = true;
+ debug13(printf(" Redefining close mappingend last to %u\n",close_mappingend_last - chroffset));
+ }
+ }
+ }
- if (minus_nsegments > 0) {
- /* Use segments to bound */
- debug13(printf("Finding segments from segmentstart %u to segmentend %u (minus_nsegments %d)\n",
- segmentstart - chroffset,segmentend - chroffset,minus_nsegments));
- starti = endi = -1;
- i = binary_search_segments(0,minus_nsegments-1,minus_segments,segmentstart);
- while (i < minus_nsegments - 1 && minus_segments[i].diagonal == (Univcoord_T) -1) {
- i++;
- }
- starti = i;
- while (minus_segments[i].diagonal < segmentend) {
- endi = i;
- i++;
- }
- if (starti >= 0 && endi >= 0) {
- debug13(printf("starti = %d, endi = %d\n",starti,endi));
- assert(starti <= endi);
- for (i = starti; i <= endi; i++) {
- debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
- (Chrpos_T) (minus_segments[i].diagonal - chroffset),(unsigned long long) minus_segments[i].diagonal,
- minus_segments[i].querypos5,minus_segments[i].querypos3));
- if (minus_segments[i].querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
- /* Case 4. Missing start of query, so there could be a middle splice */
- debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
- minus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
- if ((mappingpos = add_bounded(minus_segments[i].diagonal,shortsplicedist_novelend,chrhigh)) < middle_mappingend_greedy &&
- mappingpos > genomicbound) {
- middle_mappingend_greedy = mappingpos;
- middle_mappingend_p = true;
- debug13(printf(" Redefining middle mappingend greedy to %u\n",middle_mappingend_greedy - chroffset));
- }
-#ifdef LONG_ENDSPLICES
- if ((mappingpos = add_bounded(minus_segments[i].diagonal,shortsplicedist,chrhigh)) > middle_mappingend_last) {
- /* Use > for NOT_GREEDY */
- middle_mappingend_last = mappingpos;
- middle_mappingend_p = true;
- debug13(printf(" Redefining middle mappingend to %u\n",middle_mappingend_last - chroffset));
- }
-#else
- if (mappingpos > middle_mappingend_last) {
- /* Use > for NOT_GREEDY */
- middle_mappingend_last = mappingpos;
- middle_mappingend_p = true;
- debug13(printf(" Redefining middle mappingend to %u\n",middle_mappingend_last - chroffset));
- }
-#endif
+ #ifdef USE_GREEDY
+ if (close_mappingend_p == true) {
+ close_knownsplice_limit_high = add_bounded(close_mappingend_greedy,shortsplicedist,chrhigh);
+ } else if (middle_mappingend_p == true) {
+ debug13(printf("Using middle mappingend\n"));
+ close_knownsplice_limit_high = middle_mappingend_greedy;
+ close_mappingend_greedy = middle_mappingend_greedy;
+ close_mappingend_p = true;
+ }
+ #else
+ if (close_mappingend_p == true) {
+ close_knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
+ } else if (middle_mappingend_p == true) {
+ debug13(printf("Using middle mappingend\n"));
+ close_knownsplice_limit_high = middle_mappingend_last;
+ close_mappingend_last = middle_mappingend_last;
+ close_mappingend_p = true;
+ }
+ #endif
+ #ifdef USE_GREEDY
+ if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_greedy) {
+ knownsplice_limit_high = middle_mappingend_last;
+ mappingend = middle_mappingend_last;
+ } else if (close_mappingend_p == true && close_mappingend_last != close_mappingend_greedy) {
+ knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
+ mappingend = close_mappingend_last;
+ }
+ #else
+ if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_last) {
+ knownsplice_limit_high = middle_mappingend_last;
+ mappingend = middle_mappingend_last;
+ }
+ #endif
+
+ if (close_mappingend_p == false) {
+ fallback_mappingend_p = false;
+ #ifdef USE_GREEDY
+ } else if (mappingend <= close_mappingend_greedy) {
+ fallback_mappingend_p = false;
+ #endif
+ } else {
+ debug13(printf("Fallback mappingend = %u\n",mappingend - chroffset));
+ fallback_mappingend_p = true;
+ }
+ }
+ }
+
+ favor_right_p = false;
+
+ } else {
+ chroffset = Stage3end_chroffset(hit5);
+ chrhigh = Stage3end_chrhigh(hit5);
+ chrlength = Stage3end_chrlength(hit5);
+
+ if (Shortread_find_primers(queryseq5,queryseq3) == true) {
+ /* Go from genomicstart */
+ debug13(printf("Found primers\n"));
+ genomicbound = Stage3end_genomicstart(hit5);
+
+ } else if (Stage3end_anomalous_splice_p(hit5) == true) {
+ /* Go from genomicstart */
+ debug13(printf("Anomalous splice\n"));
+ genomicbound = Stage3end_genomicstart(hit5);
+
+ } else {
+ genomicbound = Stage3end_genomicend(hit5);
+
+ #if 0
+ /* TODO: Previously called Shortread_find_overlap. Now with Shortread_max_overlap, can optimize this code */
+ if ((overlap = Shortread_max_overlap(queryseq5,queryseq3)) > 0 &&
+ Stage3end_genomicbound_from_end(&genomicbound2,hit5,overlap,chroffset) == true) {
+ debug13(printf("Found overlap of %d\n",overlap));
+ if (genomicbound2 > genomicbound) {
+ zero_offset = genomicbound2 - genomicbound;
+ genomicbound = genomicbound2;
+ }
+ }
+ #endif
+ }
+
+ debug13(printf("Case 2: hit5 minus %s %u..%u (sensedir %d) => genomicbound %u\n",
+ Stage3end_hittype_string(hit5),
+ Stage3end_genomicstart(hit5) - chroffset,Stage3end_genomicend(hit5) - chroffset,
+ Stage3end_sensedir(hit5),genomicbound - chroffset));
+
+ knownsplice_limit_high = mappingend = segmentend = genomicbound;
+ knownsplice_limit_low = subtract_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chroffset);
+ segmentstart = subtract_bounded(Stage3end_genomicend(hit5),pairmax,chroffset);
+ #ifdef LONG_ENDSPLICES
+ mappingstart = subtract_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chroffset);
+ #else
+ mappingstart = subtract_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist_novelend,chroffset);
+ #endif
+ debug13(printf("Original bounds F: knownsplice_limit_low %u, knownsplice_limit_high %u, mappingstart %u\n",
+ knownsplice_limit_low - chroffset,knownsplice_limit_high - chroffset,mappingstart - chroffset));
+
+ close_mappingstart_last = middle_mappingstart_last = Stage3end_genomicend(hit5);
+ #ifdef USE_GREEDY
+ close_mappingstart_greedy = middle_mappingstart_greedy = segmentstart;
+ #endif
+
+ if (minus_nsegments > 0) {
+ /* Use segments to bound */
+ debug13(printf("Finding segments from segmentstart %u to segmentend %u (minus_nsegments %d)\n",
+ segmentstart - chroffset,segmentend - chroffset,minus_nsegments));
+ starti = endi = -1;
+ i = binary_search_segments(0,minus_nsegments-1,minus_segments,segmentend);
+ while (i >= 0 && minus_segments[i].diagonal >= segmentend) {
+ i--;
+ }
+ starti = i;
+ while (i >= 0 && minus_segments[i].diagonal > segmentstart) {
+ if (minus_segments[i].diagonal < (Univcoord_T) -1) {
+ endi = i;
+ }
+ i--;
+ }
+ if (starti >= 0 && endi >= 0) {
+ debug13(printf("starti = %d, endi = %d\n",starti,endi));
+ assert(starti >= endi);
+ for (i = starti; i >= endi; i--) {
+ debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
+ (Chrpos_T) (minus_segments[i].diagonal - chroffset),(unsigned long long) minus_segments[i].diagonal,
+ minus_segments[i].querypos5,minus_segments[i].querypos3));
+ if (query_lastpos - minus_segments[i].querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 2. Missing end of query, so there could be a middle splice */
+ debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ query_lastpos,minus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
+ #ifdef USE_GREEDY
+ if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength + shortsplicedist_novelend,chroffset)) > middle_mappingstart_greedy &&
+ mappingpos < genomicbound) {
+ middle_mappingstart_greedy = mappingpos;
+ middle_mappingstart_p = true;
+ debug13(printf(" Redefining middle mappingstart greedy to %u\n",middle_mappingstart_greedy - chroffset));
+ }
+ #endif
+ #ifdef LONG_ENDSPLICES
+ if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength + shortsplicedist,chroffset)) < middle_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ middle_mappingstart_last = mappingpos;
+ middle_mappingstart_p = true;
+ debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
+ }
+ #else
+ if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength,chroffset)) < middle_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ middle_mappingstart_last = mappingpos;
+ middle_mappingstart_p = true;
+ debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
+ }
+ #endif
+
+ } else {
+ debug13b(printf(" query_lastpos %d - querypos3 %d < %d + %d, so using this diagonal\n",
+ query_lastpos,minus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
+ #ifdef USE_GREEDY
+ if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength,chroffset)) > close_mappingstart_greedy &&
+ mappingpos < genomicbound) {
+ close_mappingstart_greedy = mappingpos;
+ close_mappingstart_p = true;
+ debug13(printf(" Redefining close mappingstart greedy to %u\n",close_mappingstart_greedy - chroffset));
+ }
+ #endif
+ if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength,chroffset)) < close_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ close_mappingstart_last = mappingpos;
+ close_mappingstart_p = true;
+ debug13(printf(" Redefining close mappingstart last to %u\n",close_mappingstart_last - chroffset));
+ }
+ }
+ }
- } else {
- debug13b(printf(" querypos5 %d < %d + %d, so using this diagonal\n",
- minus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
- if ((mappingpos = minus_segments[i].diagonal) < close_mappingend_greedy &&
- mappingpos > genomicbound) {
- close_mappingend_greedy = mappingpos;
- close_mappingend_p = true;
- debug13(printf(" Redefining close mappingend greedy to %u\n",close_mappingend_greedy - chroffset));
- }
- if (mappingpos > close_mappingend_last) {
- /* Use > for NOT_GREEDY */
- close_mappingend_last = mappingpos;
- close_mappingend_p = true;
- debug13(printf(" Redefining close mappingend last to %u\n",close_mappingend_last - chroffset));
- }
- }
- }
+ #ifdef USE_GREEDY
+ if (close_mappingstart_p == true) {
+ close_knownsplice_limit_low = subtract_bounded(close_mappingstart_greedy,shortsplicedist,chroffset);
+ } else if (middle_mappingstart_p == true) {
+ debug13(printf("Using middle mappingstart\n"));
+ close_knownsplice_limit_low = middle_mappingstart_greedy;
+ close_mappingstart_greedy = middle_mappingstart_greedy;
+ close_mappingstart_p = true;
+ }
+ #else
+ if (close_mappingstart_p == true) {
+ close_knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
+ } else if (middle_mappingstart_p == true) {
+ debug13(printf("Using middle mappingstart\n"));
+ close_knownsplice_limit_low = middle_mappingstart_last;
+ close_mappingstart_last = middle_mappingstart_last;
+ close_mappingstart_p = true;
+ }
+ #endif
+ #ifdef USE_GREEDY
+ if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_greedy) {
+ knownsplice_limit_low = middle_mappingstart_last;
+ mappingstart = middle_mappingstart_last;
+ } else if (close_mappingstart_p == true && close_mappingstart_last != close_mappingstart_greedy) {
+ knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
+ mappingstart = close_mappingstart_last;
+ }
+ #else
+ if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_last) {
+ knownsplice_limit_low = middle_mappingstart_last;
+ mappingstart = middle_mappingstart_last;
+ }
+ #endif
+ if (close_mappingstart_p == false) {
+ fallback_mappingstart_p = false;
+ #ifdef USE_GREEDY
+ } else if (mappingstart >= close_mappingstart_greedy) {
+ fallback_mappingstart_p = false;
+ #endif
+ } else {
+ debug13(printf("Fallback mappingstart = %u\n",mappingstart - chroffset));
+ fallback_mappingstart_p = true;
+ }
+ }
+ }
+
+ favor_right_p = false;
+ }
+
+ if ((sensedir = Stage3end_sensedir(hit5)) == SENSE_FORWARD) {
+ sense_try = +1;
+ } else if (sensedir == SENSE_ANTI) {
+ sense_try = -1;
+ } else {
+ sense_try = 0;
+ }
+
+ } else if (hit5 == NULL) {
+ /* Both events are tested by Stage3end_anomalous_splice_p */
+ if ((chrnum = Stage3end_chrnum(hit3)) == 0) {
+ /* Translocation */
+ return (List_T) NULL;
+
+ } else if (Stage3end_hittype(hit3) == SAMECHR_SPLICE) {
+ /* A genomic event that doesn't get reflected in chrnum */
+ return (List_T) NULL;
+
+ } else if ((watsonp = Stage3end_plusp(hit3)) == true) {
+ chroffset = Stage3end_chroffset(hit3);
+ chrhigh = Stage3end_chrhigh(hit3);
+ chrlength = Stage3end_chrlength(hit3);
+
+ if (Shortread_find_primers(queryseq5,queryseq3) == true) {
+ /* Go from genomicend */
+ debug13(printf("Found primers\n"));
+ genomicbound = Stage3end_genomicend(hit3);
+
+ } else if (Stage3end_anomalous_splice_p(hit3) == true) {
+ /* Go from genomicend */
+ debug13(printf("Anomalous splice\n"));
+ genomicbound = Stage3end_genomicend(hit3);
+
+ } else {
+ genomicbound = Stage3end_genomicstart(hit3);
+
+ #if 0
+ /* TODO: Previously called Shortread_find_overlap. Now with Shortread_max_overlap, can optimize this code */
+ if ((overlap = Shortread_max_overlap(queryseq5,queryseq3)) > 0 &&
+ Stage3end_genomicbound_from_start(&genomicbound2,hit3,overlap,chroffset) == true) {
+ debug13(printf("Found overlap of %d\n",overlap));
+ if (genomicbound2 > genomicbound) {
+ zero_offset = genomicbound2 - genomicbound;
+ genomicbound = genomicbound2;
+ }
+ }
+ #endif
+ }
+
+ debug13(printf("Case 3: hit3 plus %s %u..%u (sensedir %d) => genomicbound %u\n",
+ Stage3end_hittype_string(hit3),
+ Stage3end_genomicstart(hit3) - chroffset,Stage3end_genomicend(hit3) - chroffset,
+ Stage3end_sensedir(hit3),genomicbound - chroffset));
+
+ knownsplice_limit_high = mappingend = segmentend = genomicbound;
+ knownsplice_limit_low = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chroffset);
+ segmentstart = subtract_bounded(Stage3end_genomicstart(hit3),pairmax,chroffset);
+ #ifdef LONG_ENDSPLICES
+ mappingstart = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chroffset);
+ #else
+ mappingstart = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist_novelend,chroffset);
+ #endif
+
+ close_mappingstart_last = middle_mappingstart_last = Stage3end_genomicstart(hit3);
+ #ifdef USE_GREEDY
+ close_mappingstart_greedy = middle_mappingstart_greedy = segmentstart;
+ #endif
+
+ if (plus_nsegments > 0) {
+ /* Use segments to bound */
+ debug13(printf("Finding segments from segmentstart %u to segmentend %u (plus_nsegments %d)\n",
+ segmentstart - chroffset,segmentend - chroffset,plus_nsegments));
+ starti = endi = -1;
+ i = binary_search_segments(0,plus_nsegments-1,plus_segments,segmentend);
+ while (i >= 0 && plus_segments[i].diagonal >= segmentend) {
+ i--;
+ }
+ starti = i;
+ while (i >= 0 && plus_segments[i].diagonal > segmentstart) {
+ if (plus_segments[i].diagonal < (Univcoord_T) -1) {
+ endi = i;
+ }
+ i--;
+ }
+ if (starti >= 0 && endi >= 0) {
+ debug13(printf("starti = %d, endi = %d\n",starti,endi));
+ assert(starti >= endi);
+ for (i = starti; i >= endi; i--) {
+ debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
+ (Chrpos_T) (plus_segments[i].diagonal - chroffset),(unsigned long long) plus_segments[i].diagonal,
+ plus_segments[i].querypos5,plus_segments[i].querypos3));
+ if (plus_segments[i].querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 3. Missing start of query, so there could be a middle splice */
+ debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ plus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
+ #ifdef USE_GREEDY
+ if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength + shortsplicedist_novelend,chroffset)) > middle_mappingstart_greedy &&
+ mappingpos < genomicbound) {
+ middle_mappingstart_greedy = mappingpos;
+ middle_mappingstart_p = true;
+ debug13(printf(" Redefining middle mappingstart greedy to %u\n",middle_mappingstart_greedy - chroffset));
+ }
+ #endif
+ #ifdef LONG_ENDSPLICES
+ if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength + shortsplicedist,chroffset)) < middle_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ middle_mappingstart_last = mappingpos;
+ middle_mappingstart_p = true;
+ debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
+ }
+ #else
+ if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength,chroffset)) < middle_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ middle_mappingstart_last = mappingpos;
+ middle_mappingstart_p = true;
+ debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
+ }
+ #endif
+
+ } else {
+ debug13b(printf(" querypos5 %d < %d + %d, so using this diagonal\n",
+ plus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
+ #ifdef USE_GREEDY
+ if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength,chroffset)) > close_mappingstart_greedy &&
+ mappingpos < genomicbound) {
+ close_mappingstart_greedy = mappingpos;
+ close_mappingstart_p = true;
+ debug13(printf(" Redefining close mappingstart greedy to %u\n",close_mappingstart_greedy - chroffset));
+ }
+ #endif
+ if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength,chroffset)) < close_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ close_mappingstart_last = mappingpos;
+ close_mappingstart_p = true;
+ debug13(printf(" Redefining close mappingstart last to %u\n",close_mappingstart_last - chroffset));
+ }
+ }
+ }
- if (close_mappingend_p == true) {
- close_knownsplice_limit_high = add_bounded(close_mappingend_greedy,shortsplicedist,chrhigh);
- } else if (middle_mappingend_p == true) {
- debug13(printf("Using middle mappingend\n"));
- close_knownsplice_limit_high = middle_mappingend_greedy;
- close_mappingend_greedy = middle_mappingend_greedy;
- close_mappingend_p = true;
- }
- if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_greedy) {
- knownsplice_limit_high = middle_mappingend_last;
- mappingend = middle_mappingend_last;
- } else if (close_mappingend_p == true && close_mappingend_last != close_mappingend_greedy) {
- knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
- mappingend = close_mappingend_last;
- }
- if (close_mappingend_p == false) {
- fallback_mappingend_p = false;
- } else if (mappingend <= close_mappingend_greedy) {
- fallback_mappingend_p = false;
- } else {
- debug13(printf("Fallback mappingend = %u\n",mappingend - chroffset));
- fallback_mappingend_p = true;
+ #ifdef USE_GREEDY
+ if (close_mappingstart_p == true) {
+ close_knownsplice_limit_low = subtract_bounded(close_mappingstart_greedy,shortsplicedist,chroffset);
+ } else if (middle_mappingstart_p == true) {
+ debug13(printf("Using middle mappingstart\n"));
+ close_knownsplice_limit_low = middle_mappingstart_greedy;
+ close_mappingstart_greedy = middle_mappingstart_greedy;
+ close_mappingstart_p = true;
+ }
+ #else
+ if (close_mappingstart_p == true) {
+ close_knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
+ } else if (middle_mappingstart_p == true) {
+ debug13(printf("Using middle mappingstart\n"));
+ close_knownsplice_limit_low = middle_mappingstart_last;
+ close_mappingstart_last = middle_mappingstart_last;
+ close_mappingstart_p = true;
+ }
+ #endif
+ #ifdef USE_GREEDY
+ if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_greedy) {
+ knownsplice_limit_low = middle_mappingstart_last;
+ mappingstart = middle_mappingstart_last;
+ } else if (close_mappingstart_p == true && close_mappingstart_last != close_mappingstart_greedy) {
+ knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
+ mappingstart = close_mappingstart_last;
+ }
+ #else
+ if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_last) {
+ knownsplice_limit_low = middle_mappingstart_last;
+ mappingstart = middle_mappingstart_last;
+ }
+ #endif
+ if (close_mappingstart_p == false) {
+ fallback_mappingstart_p = false;
+ #ifdef USE_GREEDY
+ } else if (mappingstart >= close_mappingstart_greedy) {
+ fallback_mappingstart_p = false;
+ #endif
+ } else {
+ debug13(printf("Fallback mappingstart = %u\n",mappingstart - chroffset));
+ fallback_mappingstart_p = true;
+ }
+ }
+ }
+
+ favor_right_p = true;
+
+ } else {
+ chroffset = Stage3end_chroffset(hit3);
+ chrhigh = Stage3end_chrhigh(hit3);
+ chrlength = Stage3end_chrlength(hit3);
+
+ if (Shortread_find_primers(queryseq5,queryseq3) == true) {
+ /* Go from genomicend */
+ debug13(printf("Found primers\n"));
+ genomicbound = Stage3end_genomicend(hit3);
+
+ } else if (Stage3end_anomalous_splice_p(hit3) == true) {
+ /* Go from genomicend */
+ debug13(printf("Anomalous splice\n"));
+ genomicbound = Stage3end_genomicend(hit3);
+
+ } else {
+ genomicbound = Stage3end_genomicstart(hit3);
+
+ #if 0
+ /* TODO: Previously called Shortread_find_overlap. Now with Shortread_max_overlap, can optimize this code */
+ if ((overlap = Shortread_max_overlap(queryseq5,queryseq3)) > 0 &&
+ Stage3end_genomicbound_from_start(&genomicbound2,hit3,overlap,chroffset) == true) {
+ debug13(printf("Found overlap of %d\n",overlap));
+ if (genomicbound2 < genomicbound) {
+ zero_offset = genomicbound - genomicbound2;
+ genomicbound = genomicbound2;
+ }
+ }
+ #endif
+ }
+
+ debug13(printf("Case 4: hit3 minus %s %u..%u (sensedir %d) => genomicbound %u\n",
+ Stage3end_hittype_string(hit3),
+ Stage3end_genomicstart(hit3) - chroffset,Stage3end_genomicend(hit3) - chroffset,
+ Stage3end_sensedir(hit3),genomicbound - chroffset));
+
+ knownsplice_limit_low = mappingstart = segmentstart = genomicbound;
+ knownsplice_limit_high = add_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chrhigh);
+ segmentend = add_bounded(Stage3end_genomicstart(hit3),pairmax,chrhigh);
+ #ifdef LONG_ENDSPLICES
+ mappingend = add_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chrhigh);
+ #else
+ mappingend = add_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist_novelend,chrhigh);
+ #endif
+
+ close_mappingend_last = middle_mappingend_last = Stage3end_genomicstart(hit3);
+ #ifdef USE_GREEDY
+ close_mappingend_greedy = middle_mappingend_greedy = segmentend;
+ #endif
+
+ if (minus_nsegments > 0) {
+ /* Use segments to bound */
+ debug13(printf("Finding segments from segmentstart %u to segmentend %u (minus_nsegments %d)\n",
+ segmentstart - chroffset,segmentend - chroffset,minus_nsegments));
+ starti = endi = -1;
+ i = binary_search_segments(0,minus_nsegments-1,minus_segments,segmentstart);
+ while (i < minus_nsegments - 1 && minus_segments[i].diagonal == (Univcoord_T) -1) {
+ i++;
+ }
+ starti = i;
+ while (minus_segments[i].diagonal < segmentend) {
+ endi = i;
+ i++;
+ }
+ if (starti >= 0 && endi >= 0) {
+ debug13(printf("starti = %d, endi = %d\n",starti,endi));
+ assert(starti <= endi);
+ for (i = starti; i <= endi; i++) {
+ debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
+ (Chrpos_T) (minus_segments[i].diagonal - chroffset),(unsigned long long) minus_segments[i].diagonal,
+ minus_segments[i].querypos5,minus_segments[i].querypos3));
+ if (minus_segments[i].querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 4. Missing start of query, so there could be a middle splice */
+ debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ minus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
+ #ifdef USE_GREEDY
+ if ((mappingpos = add_bounded(minus_segments[i].diagonal,shortsplicedist_novelend,chrhigh)) < middle_mappingend_greedy &&
+ mappingpos > genomicbound) {
+ middle_mappingend_greedy = mappingpos;
+ middle_mappingend_p = true;
+ debug13(printf(" Redefining middle mappingend greedy to %u\n",middle_mappingend_greedy - chroffset));
+ }
+ #endif
+ #ifdef LONG_ENDSPLICES
+ if ((mappingpos = add_bounded(minus_segments[i].diagonal,shortsplicedist,chrhigh)) > middle_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ middle_mappingend_last = mappingpos;
+ middle_mappingend_p = true;
+ debug13(printf(" Redefining middle mappingend to %u\n",middle_mappingend_last - chroffset));
+ }
+ #else
+ if ((mappingpos = minus_segments[i].diagonal) > middle_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ middle_mappingend_last = mappingpos;
+ middle_mappingend_p = true;
+ debug13(printf(" Redefining middle mappingend to %u\n",middle_mappingend_last - chroffset));
+ }
+ #endif
+
+ } else {
+ debug13b(printf(" querypos5 %d < %d + %d, so using this diagonal\n",
+ minus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
+ #ifdef USE_GREEDY
+ if ((mappingpos = minus_segments[i].diagonal) < close_mappingend_greedy &&
+ mappingpos > genomicbound) {
+ close_mappingend_greedy = mappingpos;
+ close_mappingend_p = true;
+ debug13(printf(" Redefining close mappingend greedy to %u\n",close_mappingend_greedy - chroffset));
+ }
+ #endif
+ if ((mappingpos = minus_segments[i].diagonal) > close_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ close_mappingend_last = mappingpos;
+ close_mappingend_p = true;
+ debug13(printf(" Redefining close mappingend last to %u\n",close_mappingend_last - chroffset));
+ }
+ }
+ }
+
+ #ifdef USE_GREEDY
+ if (close_mappingend_p == true) {
+ close_knownsplice_limit_high = add_bounded(close_mappingend_greedy,shortsplicedist,chrhigh);
+ } else if (middle_mappingend_p == true) {
+ debug13(printf("Using middle mappingend\n"));
+ close_knownsplice_limit_high = middle_mappingend_greedy;
+ close_mappingend_greedy = middle_mappingend_greedy;
+ close_mappingend_p = true;
+ }
+ #else
+ if (close_mappingend_p == true) {
+ close_knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
+ } else if (middle_mappingend_p == true) {
+ debug13(printf("Using middle mappingend\n"));
+ close_knownsplice_limit_high = middle_mappingend_last;
+ close_mappingend_last = middle_mappingend_last;
+ close_mappingend_p = true;
+ }
+ #endif
+ #ifdef USE_GREEDY
+ if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_greedy) {
+ knownsplice_limit_high = middle_mappingend_last;
+ mappingend = middle_mappingend_last;
+ } else if (close_mappingend_p == true && close_mappingend_last != close_mappingend_greedy) {
+ knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
+ mappingend = close_mappingend_last;
+ }
+ #else
+ if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_last) {
+ knownsplice_limit_high = middle_mappingend_last;
+ mappingend = middle_mappingend_last;
+ }
+ #endif
+ if (close_mappingend_p == false) {
+ fallback_mappingend_p = false;
+ #ifdef USE_GREEDY
+ } else if (mappingend <= close_mappingend_greedy) {
+ fallback_mappingend_p = false;
+ #endif
+ } else {
+ debug13(printf("Fallback mappingend = %u\n",mappingend - chroffset));
+ fallback_mappingend_p = true;
}
}
}
@@ -15270,7 +17295,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
favor_right_p = true;
}
- if ((sensedir = Stage3end_sensedir_nonamb(hit3)) == SENSE_FORWARD) {
+ if ((sensedir = Stage3end_sensedir(hit3)) == SENSE_FORWARD) {
sense_try = +1;
} else if (sensedir == SENSE_ANTI) {
sense_try = -1;
@@ -15289,14 +17314,14 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
if (close_mappingstart_p == true && close_mappingend_p == true) {
debug13(printf("Halfmapping: Running gmap with close mappingstart and close mappingend\n"));
- hits = run_gmap(&good_start_p,&good_end_p,gmap_history,
- hits,queryuc_ptr,querylength,sense_try,favor_right_p,
- /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
- query_compress_fwd,query_compress_rev,close_mappingstart_greedy,close_mappingend_greedy,
- close_knownsplice_limit_low,close_knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+ hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
+ /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
+ query_compress_fwd,query_compress_rev,close_mappingstart_last,close_mappingend_last,
+ close_knownsplice_limit_low,close_knownsplice_limit_high,
+ watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
if (good_start_p == true && good_end_p == true) {
/* Success */
@@ -15305,52 +17330,52 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
} else if (/* require both ends to be good */ 0 && good_start_p == true) {
if (fallback_mappingend_p == true) {
debug13(printf("Halfmapping: Re-running gmap with close mappingstart only\n"));
- hits = run_gmap(&good_start_p,&good_end_p,gmap_history,
- hits,queryuc_ptr,querylength,sense_try,favor_right_p,
- /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
- query_compress_fwd,query_compress_rev,close_mappingstart_greedy,mappingend,
- close_knownsplice_limit_low,knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+ hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
+ /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
+ query_compress_fwd,query_compress_rev,close_mappingstart_last,mappingend,
+ close_knownsplice_limit_low,knownsplice_limit_high,
+ watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
}
} else if (/* require both ends to be good */ 0 && good_end_p == true) {
if (fallback_mappingstart_p == true) {
debug13(printf("Halfmapping: Re-running gmap with close mappingend only\n"));
- hits = run_gmap(&good_start_p,&good_end_p,gmap_history,
- hits,queryuc_ptr,querylength,sense_try,favor_right_p,
- /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
- query_compress_fwd,query_compress_rev,mappingstart,close_mappingend_greedy,
- knownsplice_limit_low,close_knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+ hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
+ /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
+ query_compress_fwd,query_compress_rev,mappingstart,close_mappingend_last,
+ knownsplice_limit_low,close_knownsplice_limit_high,
+ watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
}
} else {
if (fallback_mappingstart_p == true && fallback_mappingend_p == true) {
debug13(printf("Halfmapping: Re-running gmap with far mappingstart and mappingend\n"));
- hits = run_gmap(&good_start_p,&good_end_p,gmap_history,
- hits,queryuc_ptr,querylength,sense_try,favor_right_p,
- /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
- query_compress_fwd,query_compress_rev,mappingstart,mappingend,
- knownsplice_limit_low,knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+ hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
+ /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
+ query_compress_fwd,query_compress_rev,mappingstart,mappingend,
+ knownsplice_limit_low,knownsplice_limit_high,
+ watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
}
}
} else if (close_mappingstart_p == true) {
debug13(printf("Halfmapping: Running gmap with close mappingstart\n"));
- hits = run_gmap(&good_start_p,&good_end_p,gmap_history,
- hits,queryuc_ptr,querylength,sense_try,favor_right_p,
- /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
- query_compress_fwd,query_compress_rev,close_mappingstart_greedy,mappingend,
- close_knownsplice_limit_low,knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+ hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
+ /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
+ query_compress_fwd,query_compress_rev,close_mappingstart_last,mappingend,
+ close_knownsplice_limit_low,knownsplice_limit_high,
+ watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
if (good_start_p == true && /* require both ends to be good */ good_end_p == true) {
/* Success */
@@ -15358,26 +17383,26 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
debug13(printf("Skipping re-run of gmap\n"));
} else if (fallback_mappingstart_p == true) {
debug13(printf("Halfmapping: Re-running gmap with far mappingstart\n"));
- hits = run_gmap(&good_start_p,&good_end_p,gmap_history,
- hits,queryuc_ptr,querylength,sense_try,favor_right_p,
- /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
- query_compress_fwd,query_compress_rev,mappingstart,mappingend,
- knownsplice_limit_low,knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+ hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
+ /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
+ query_compress_fwd,query_compress_rev,mappingstart,mappingend,
+ knownsplice_limit_low,knownsplice_limit_high,
+ watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
}
} else if (close_mappingend_p == true) {
debug13(printf("Halfmapping: Running gmap with close mappingend\n"));
- hits = run_gmap(&good_start_p,&good_end_p,gmap_history,
- hits,queryuc_ptr,querylength,sense_try,favor_right_p,
- /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
- query_compress_fwd,query_compress_rev,mappingstart,close_mappingend_greedy,
- knownsplice_limit_low,close_knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+ hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
+ /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
+ query_compress_fwd,query_compress_rev,mappingstart,close_mappingend_last,
+ knownsplice_limit_low,close_knownsplice_limit_high,
+ watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
if (good_end_p == true && /* require both ends to be good */ good_start_p == true) {
/* Success */
@@ -15385,26 +17410,26 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
debug13(printf("Skipping re-run of gmap\n"));
} else if (fallback_mappingend_p == true) {
debug13(printf("Halfmapping: Re-running gmap with far mappingend\n"));
- hits = run_gmap(&good_start_p,&good_end_p,gmap_history,
- hits,queryuc_ptr,querylength,sense_try,favor_right_p,
- /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
- query_compress_fwd,query_compress_rev,mappingstart,mappingend,
- knownsplice_limit_low,knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+ hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
+ /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
+ query_compress_fwd,query_compress_rev,mappingstart,mappingend,
+ knownsplice_limit_low,knownsplice_limit_high,
+ watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
}
} else {
debug13(printf("Halfmapping: Running gmap with far mappingstart and mappingend\n"));
- hits = run_gmap(&good_start_p,&good_end_p,gmap_history,
- hits,queryuc_ptr,querylength,sense_try,favor_right_p,
- /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
- query_compress_fwd,query_compress_rev,mappingstart,mappingend,
- knownsplice_limit_low,knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+ hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
+ /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
+ query_compress_fwd,query_compress_rev,mappingstart,mappingend,
+ knownsplice_limit_low,knownsplice_limit_high,
+ watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
}
return hits;
@@ -15427,11 +17452,10 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- int pairmax, int user_maxlevel_5, int user_maxlevel_3,
- int cutoff_level_5, int cutoff_level_3,
+ Chrpos_T pairmax, int cutoff_level_5, int cutoff_level_3,
Pairtype_T pairtype, bool expect_concordant_p, bool redo_for_sense_p) {
Stage3pair_T newpair, stage3pair;
- List_T gmap5_hits = NULL, gmap3_hits = NULL, good_gmap5_hits = NULL, good_gmap3_hits = NULL;
+ List_T gmap5_hits = NULL, gmap3_hits = NULL;
Stage3end_T hit5, hit3, gmap5, gmap3;
List_T p, a, b, rest;
int genestrand;
@@ -15440,7 +17464,7 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
bool replacedp;
- debug13(printf("Sorting hitpairs by nmatches\n"));
+ debug13(printf("Sorting %d hitpairs by nmatches\n",List_length(result)));
result = Stage3pair_sort_bymatches(result);
for (p = result, i = 0; p != NULL && i < max_gmap_improvement; p = p->rest, i++) {
@@ -15454,7 +17478,9 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
i,Stage3end_hittype_string(hit5),Stage3end_hittype_string(hit3)));
/* Was querylength5 - Stage3end_matches(hit5) > 5 */
- debug13(printf("**Looking at hit5\n"));
+ debug13(printf("Looking at hit5 with nmismatches %d - %d ?<= cutoff_level %d\n",
+ querylength5,Stage3end_nmatches_posttrim(hit5),cutoff_level_5));
+#if 0
if (Stage3end_sarrayp(hit5) == true && redo_for_sense_p == false) {
/* Skip */
debug13(printf("Skipping hit5 from sarray search\n"));
@@ -15463,215 +17489,68 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
/* Skip */
debug13(printf("Skipping hit5 of type GMAP\n"));
-#if 0
/* Don't skip on final align_concordant_with_gmap */
} else if (Stage3end_hittype(hit5) == TERMINAL) {
/* Skip */
debug13(printf("Skipping hit5 of type TERMINAL\n"));
+
+ } /* else */
#endif
- } else if (querylength5 - Stage3end_nmatches_posttrim(hit5) <= user_maxlevel_5) {
+ if (querylength5 - Stage3end_nmatches_posttrim(hit5) <= cutoff_level_5) {
/* Skip */
- debug13(printf("Skipping hit with nmismatches %d - %d <= user_maxlevel %d\n",
- querylength5,Stage3end_nmatches_posttrim(hit5),user_maxlevel_5));
+ debug13(printf("Skipping hit5 with nmismatches %d - %d <= cutoff_level %d\n",
+ querylength5,Stage3end_nmatches_posttrim(hit5),cutoff_level_5));
- } else if (expect_concordant_p == false) {
- debug13(printf("expect_concordant_p is false, so running GMAP single end on 5'\n"));
- gmap5_hits = align_single_hit_with_gmap(gmap_history_5,hit5,
- /*extend_left_p*/true,/*extend_right_p*/true,
- queryuc_ptr_5,querylength5,query5_lastpos,
+ } else {
+ if ((gmap5 = align_single_hit_with_gmap(hit5,queryuc_ptr_5,querylength5,
#ifdef END_KNOWNSPLICING_SHORTCUT
queryrc5,Shortread_invertedp(queryseq5),
#endif
- query5_compress_fwd,query5_compress_rev,
- plus_segments_genestrand_5[genestrand],
- plus_nsegments_genestrand_5[genestrand],
- minus_segments_genestrand_5[genestrand],
- minus_nsegments_genestrand_5[genestrand],
- oligoindices_major,oligoindices_minor,
+ oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel_5,genestrand,/*first_read_p*/true);
-
- missing_hit = querylength5 - Stage3end_nmatches_posttrim(hit5);
- for (b = gmap5_hits; b != NULL; b = List_next(b)) {
- gmap5 = (Stage3end_T) List_head(b);
- missing_gmap = querylength5 - Stage3end_nmatches_posttrim(gmap5);
- if (Stage3end_score(gmap5) > cutoff_level_5 + gmap_allowance) {
- debug13(printf("Score is only %d vs cutoff_level_5 %d\n",Stage3end_score(gmap5),cutoff_level_5));
- Stage3end_free(&gmap5);
- } else if (missing_gmap < missing_hit/2) {
- debug13(printf("GMAP with %d matches, %d missing is significantly better than 5' hit with %d matches, %d missing\n",
- Stage3end_nmatches_posttrim(gmap5),missing_gmap,Stage3end_nmatches_posttrim(hit5),missing_hit));
- good_gmap5_hits = List_push(good_gmap5_hits,(void *) gmap5);
- Stage3end_set_improved_by_gmap(hit5);
- } else {
- debug13(printf("GMAP with %d matches, %d missing is not significantly better than 5' hit with %d matches, %d missing\n",
- Stage3end_nmatches_posttrim(gmap5),missing_gmap,Stage3end_nmatches_posttrim(hit5),missing_hit));
- Stage3end_free(&gmap5);
- }
- }
- List_free(&gmap5_hits);
-
- } else if ((redo_for_sense_p == true && Stage3end_sensedir(hit5) == 0) ||
- Stage3end_terminal_trim(hit5) > GMAP_TERMINAL_TRIM ||
- Stage3end_contains_known_splicesite(hit5) == true) {
- debug13(printf("To correct hit5 terminalp %d or known_splicesite %d, running GMAP on 5' to match with 3' end\n",
- Stage3end_hittype(hit5) == TERMINAL,
- Stage3end_contains_known_splicesite(hit5)));
-
- /* Want high quality because we already have a pretty good answer */
- gmap5_hits = align_halfmapping_with_gmap(gmap_history_5,/*hit5*/NULL,hit3,
- queryseq5,queryseq3,
- queryuc_ptr_5,/*querylength*/querylength5,query5_lastpos,
-#ifdef END_KNOWNSPLICING_SHORTCUT
- queryrc5,Shortread_invertedp(queryseq5),
-#endif
- query5_compress_fwd,query5_compress_rev,
- plus_segments_genestrand_5[genestrand],
- plus_nsegments_genestrand_5[genestrand],
- minus_segments_genestrand_5[genestrand],
- minus_nsegments_genestrand_5[genestrand],
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- pairmax,shortsplicedist,user_maxlevel_5,genestrand,
- /*first_read_p*/true);
-
- missing_hit = querylength5 - Stage3end_nmatches_posttrim(hit5);
- for (a = gmap5_hits; a != NULL; a = List_next(a)) {
- gmap5 = (Stage3end_T) List_head(a);
- missing_gmap = querylength5 - Stage3end_nmatches_posttrim(gmap5);
- if (Stage3end_score(gmap5) > cutoff_level_5 + gmap_allowance) {
- debug13(printf("Score is only %d vs cutoff_level_5 %d\n",Stage3end_score(gmap5),cutoff_level_5));
- Stage3end_free(&gmap5);
- } else if (redo_for_sense_p == true && Stage3end_sensedir(hit5) == 0) {
- debug13(printf("redo_for_sense, so using this one\n"));
- good_gmap5_hits = List_push(good_gmap5_hits,(void *) gmap5);
- } else if (missing_gmap < missing_hit/2) {
- debug13(printf("GMAP with %d matches, %d missing is significantly better than 5' hit with %d matches, %d missing\n",
- Stage3end_nmatches_posttrim(gmap5),missing_gmap,Stage3end_nmatches_posttrim(hit5),missing_hit));
- good_gmap5_hits = List_push(good_gmap5_hits,(void *) gmap5);
- Stage3end_set_improved_by_gmap(hit5);
- } else {
- debug13(printf("GMAP with %d matches, %d missing is not significantly better than 5' hit with %d matches, %d missing\n",
- Stage3end_nmatches_posttrim(gmap5),missing_gmap,Stage3end_nmatches_posttrim(hit5),missing_hit));
- Stage3end_free(&gmap5);
- }
+ genestrand,/*first_read_p*/true)) != NULL) {
+ debug13(missing_hit = querylength5 - Stage3end_nmatches_posttrim(hit5));
+ debug13(missing_gmap = querylength5 - Stage3end_nmatches_posttrim(gmap5));
+ debug13(printf("GMAP %p with %d matches, %d missing compared with original 5' hit with %d matches, %d missing\n",
+ gmap5,Stage3end_nmatches_posttrim(gmap5),missing_gmap,Stage3end_nmatches_posttrim(hit5),missing_hit));
+ gmap5_hits = List_push(gmap5_hits,(void *) gmap5);
+ Stage3end_set_improved_by_gmap(hit5);
}
- List_free(&gmap5_hits);
}
- debug13(printf("**Looking at hit3\n"));
- if (Stage3end_sarrayp(hit3) == true && redo_for_sense_p == false) {
- /* Skip */
- debug13(printf("Skipping hit3 from sarray search\n"));
-
- } else if (Stage3end_hittype(hit3) == GMAP && redo_for_sense_p == false) {
- /* Skip */
- debug13(printf("Skipping hit3 of type GMAP\n"));
-
-#if 0
- /* Don't skip on final align_concordant_with_gmap */
- } else if (Stage3end_hittype(hit3) == TERMINAL) {
- /* Skip */
- debug13(printf("Skipping hit3 of type TERMINAL\n"));
-#endif
+ debug13(printf("Looking at hit3 with nmismatches %d - %d ?<= cutoff_level %d\n",
+ querylength3,Stage3end_nmatches_posttrim(hit3),cutoff_level_3));
- } else if (querylength3 - Stage3end_nmatches_posttrim(hit3) <= user_maxlevel_3) {
+ if (querylength3 - Stage3end_nmatches_posttrim(hit3) <= cutoff_level_3) {
/* Skip */
- debug13(printf("Skipping hit3 with nmismatches %d - %d <= user_maxlevel %d\n",
- querylength3,Stage3end_nmatches_posttrim(hit3),user_maxlevel_3));
+ debug13(printf("Skipping hit3 with nmismatches %d - %d <= cutoff_level %d\n",
+ querylength3,Stage3end_nmatches_posttrim(hit3),cutoff_level_3));
- } else if (expect_concordant_p == false) {
+ } else {
debug13(printf("expect_concordant_p is false, so running GMAP single end on 3'\n"));
- gmap3_hits = align_single_hit_with_gmap(gmap_history_3,hit3,
- /*extend_left_p*/true,/*extend_right_p*/true,
- queryuc_ptr_3,querylength3,query3_lastpos,
+ if ((gmap3 = align_single_hit_with_gmap(hit3,queryuc_ptr_3,querylength3,
#ifdef END_KNOWNSPLICING_SHORTCUT
queryrc3,Shortread_invertedp(queryseq3),
#endif
- query3_compress_fwd,query3_compress_rev,
- plus_segments_genestrand_3[genestrand],
- plus_nsegments_genestrand_3[genestrand],
- minus_segments_genestrand_3[genestrand],
- minus_nsegments_genestrand_3[genestrand],
- oligoindices_major,oligoindices_minor,
+ oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel_3,genestrand,/*first_read_p*/false);
-
- missing_hit = querylength3 - Stage3end_nmatches_posttrim(hit3);
- for (b = gmap3_hits; b != NULL; b = List_next(b)) {
- gmap3 = (Stage3end_T) List_head(b);
- missing_gmap = querylength3 - Stage3end_nmatches_posttrim(gmap3);
- if (Stage3end_score(gmap3) > cutoff_level_3 + gmap_allowance) {
- debug13(printf("Score is only %d vs cutoff_level_3 %d\n",Stage3end_score(gmap3),cutoff_level_3));
- Stage3end_free(&gmap3);
- } else if (missing_gmap < missing_hit/2) {
- debug13(printf("GMAP with %d matches, %d missing is significantly better than 3' hit with %d matches, %d missing\n",
- Stage3end_nmatches_posttrim(gmap3),missing_gmap,Stage3end_nmatches_posttrim(hit3),missing_hit));
- good_gmap3_hits = List_push(good_gmap3_hits,(void *) gmap3);
- Stage3end_set_improved_by_gmap(hit3);
- } else {
- debug13(printf("GMAP with %d matches, %d missing is not significantly better than 3' hit with %d matches, %d missing\n",
- Stage3end_nmatches_posttrim(gmap3),missing_gmap,Stage3end_nmatches_posttrim(hit3),missing_hit));
- Stage3end_free(&gmap3);
- }
- }
- List_free(&gmap3_hits);
-
- } else if ((redo_for_sense_p == true && Stage3end_sensedir(hit3) == 0) ||
- Stage3end_terminal_trim(hit3) > GMAP_TERMINAL_TRIM ||
- Stage3end_contains_known_splicesite(hit3) == true) {
- debug13(printf("To correct hit3 terminal %d or known_splicesite %d, running GMAP on 3' to match with 5' end\n",
- Stage3end_hittype(hit3) == TERMINAL,
- Stage3end_contains_known_splicesite(hit3)));
-
- /* Want high quality because we already have a pretty good answer */
- gmap3_hits = align_halfmapping_with_gmap(gmap_history_3,hit5,/*hit3*/NULL,
- queryseq5,queryseq3,
- queryuc_ptr_3,/*querylength*/querylength3,query3_lastpos,
-#ifdef END_KNOWNSPLICING_SHORTCUT
- queryrc3,Shortread_invertedp(queryseq3),
-#endif
- query3_compress_fwd,query3_compress_rev,
- plus_segments_genestrand_3[genestrand],
- plus_nsegments_genestrand_3[genestrand],
- minus_segments_genestrand_3[genestrand],
- minus_nsegments_genestrand_3[genestrand],
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- pairmax,shortsplicedist,user_maxlevel_3,genestrand,
- /*first_read_p*/false);
-
- missing_hit = querylength3 - Stage3end_nmatches_posttrim(hit3);
- for (b = gmap3_hits; b != NULL; b = List_next(b)) {
- gmap3 = (Stage3end_T) List_head(b);
- missing_gmap = querylength3 - Stage3end_nmatches_posttrim(gmap3);
- if (Stage3end_score(gmap3) > cutoff_level_3 + gmap_allowance) {
- debug13(printf("Score is only %d vs cutoff_level_3 %d\n",Stage3end_score(gmap3),cutoff_level_3));
- Stage3end_free(&gmap3);
- } else if (redo_for_sense_p == true && Stage3end_sensedir(hit3) == 0) {
- debug13(printf("redo_for_sense, so using this one\n"));
- good_gmap3_hits = List_push(good_gmap3_hits,(void *) gmap3);
- } else if (missing_gmap < missing_hit/2) {
- debug13(printf("GMAP with %d matches, %d missing is significantly better than 3' hit with %d matches, %d missing\n",
- Stage3end_nmatches_posttrim(gmap3),missing_gmap,Stage3end_nmatches_posttrim(hit3),missing_hit));
- good_gmap3_hits = List_push(good_gmap3_hits,(void *) gmap3);
- Stage3end_set_improved_by_gmap(hit3);
- } else {
- debug13(printf("GMAP with %d matches, %d missing is not significantly better than 3' hit with %d matches, %d missing\n",
- Stage3end_nmatches_posttrim(gmap3),missing_gmap,Stage3end_nmatches_posttrim(hit3),missing_hit));
- Stage3end_free(&gmap3);
- }
+ genestrand,/*first_read_p*/false)) != NULL) {
+ debug13(missing_hit = querylength3 - Stage3end_nmatches_posttrim(hit3));
+ debug13(missing_gmap = querylength3 - Stage3end_nmatches_posttrim(gmap3));
+ debug13(printf("GMAP %p with %d matches, %d missing compared with original 3' hit with %d matches, %d missing\n",
+ gmap3,Stage3end_nmatches_posttrim(gmap3),missing_gmap,Stage3end_nmatches_posttrim(hit3),missing_hit));
+ gmap3_hits = List_push(gmap3_hits,(void *) gmap3);
+ Stage3end_set_improved_by_gmap(hit3);
}
- List_free(&gmap3_hits);
}
- if (good_gmap5_hits != NULL && good_gmap3_hits != NULL) {
+ if (gmap5_hits != NULL && gmap3_hits != NULL) {
replacedp = false;
- for (a = good_gmap5_hits; a != NULL; a = List_next(a)) {
+ for (a = gmap5_hits; a != NULL; a = List_next(a)) {
gmap5 = (Stage3end_T) List_head(a);
- for (b = good_gmap3_hits; b != NULL; b = List_next(b)) {
+ for (b = gmap3_hits; b != NULL; b = List_next(b)) {
gmap3 = (Stage3end_T) List_head(b);
debug13(printf("Imperfect concordant uniq: Double GMAP on hit5 and hit3"));
@@ -15682,7 +17561,7 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
/*private5p*/true,/*private3p*/true,expect_concordant_p)) == NULL) {
/* Stage3end_free(&gmap3); -- done by Stage3pair_new */
/* Stage3end_free(&gmap5); -- done by Stage3pair_new */
- debug13(printf(" => NULL\n"));
+ debug13(printf(" => NULL, so eliminating\n"));
} else if (replacedp == false) {
/* Convert to gmap-gmap */
@@ -15708,25 +17587,25 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
if (replacedp == true) {
Stage3pair_free(&stage3pair); /* Also frees hit5 and hit3 */
}
- for (a = good_gmap5_hits; a != NULL; a = List_next(a)) {
+ for (a = gmap5_hits; a != NULL; a = List_next(a)) {
gmap5 = (Stage3end_T) List_head(a);
Stage3end_free(&gmap5);
}
- for (b = good_gmap3_hits; b != NULL; b = List_next(b)) {
+ for (b = gmap3_hits; b != NULL; b = List_next(b)) {
gmap3 = (Stage3end_T) List_head(b);
Stage3end_free(&gmap3);
}
- List_free(&good_gmap3_hits);
- List_free(&good_gmap5_hits);
+ List_free(&gmap3_hits);
+ List_free(&gmap5_hits);
} else {
debug13(printf("Have %d GMAP 5' hits and %d GMAP 3' hits\n",
- List_length(good_gmap5_hits),List_length(good_gmap3_hits)));
+ List_length(gmap5_hits),List_length(gmap3_hits)));
/* Handle gmap5 hits */
replacedp = false;
- for (a = good_gmap5_hits; a != NULL; a = List_next(a)) {
+ for (a = gmap5_hits; a != NULL; a = List_next(a)) {
gmap5 = (Stage3end_T) List_head(a);
debug13(printf("Imperfect concordant uniq: Single GMAP on hit5"));
@@ -15762,12 +17641,12 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
Stage3pair_free(&stage3pair);
}
/* Do not free gmap5 objects, since not copied */
- List_free(&good_gmap5_hits);
+ List_free(&gmap5_hits);
/* Handle gmap3 hits */
replacedp = false;
- for (b = good_gmap3_hits; b != NULL; b = List_next(b)) {
+ for (b = gmap3_hits; b != NULL; b = List_next(b)) {
gmap3 = (Stage3end_T) List_head(b);
debug13(printf("Imperfect concordant uniq: Single GMAP on hit3"));
@@ -15803,7 +17682,7 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
Stage3pair_free(&stage3pair);
}
/* Do not free gmap3 objects, since not copied */
- List_free(&good_gmap3_hits);
+ List_free(&gmap3_hits);
}
}
@@ -15814,14 +17693,27 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
}
+/* Need to have this to resolve asymmetry between plus and minus
+ searches for suffix array. This will invoke deeper methods when
+ necessary. */
static bool
-better_free_end_exists_p (List_T subs, List_T indels, List_T singlesplicing, List_T doublesplicing,
+better_free_end_exists_p (List_T greedy, List_T subs, List_T terminals,
+ List_T indels, List_T singlesplicing, List_T doublesplicing,
int querylength) {
int best_concordant_score = querylength, score;
+ /* SPEED */
+ return false;
+
+ if ((score = Stage3end_best_score_paired(greedy)) < best_concordant_score) {
+ best_concordant_score = score;
+ }
if ((score = Stage3end_best_score_paired(subs)) < best_concordant_score) {
best_concordant_score = score;
}
+ if ((score = Stage3end_best_score_paired(terminals)) < best_concordant_score) {
+ best_concordant_score = score;
+ }
if ((score = Stage3end_best_score_paired(indels)) < best_concordant_score) {
best_concordant_score = score;
}
@@ -15833,9 +17725,15 @@ better_free_end_exists_p (List_T subs, List_T indels, List_T singlesplicing, Lis
}
debug(printf("Best concordant score = %d\n",best_concordant_score));
- if (Stage3end_equiv_score_unpaired_p(subs,best_concordant_score) == true) {
+ if (Stage3end_equiv_score_unpaired_p(greedy,best_concordant_score) == true) {
+ debug(printf("Better or equivalent score found in greedy\n"));
+ return true;
+ } else if (Stage3end_equiv_score_unpaired_p(subs,best_concordant_score) == true) {
debug(printf("Better or equivalent score found in subs\n"));
return true;
+ } else if (Stage3end_equiv_score_unpaired_p(terminals,best_concordant_score) == true) {
+ debug(printf("Better or equivalent score found in terminals\n"));
+ return true;
} else if (Stage3end_equiv_score_unpaired_p(indels,best_concordant_score) == true) {
debug(printf("Better or equivalent score found in indels\n"));
return true;
@@ -15852,15 +17750,42 @@ better_free_end_exists_p (List_T subs, List_T indels, List_T singlesplicing, Lis
-#define HITARRAY_SUBS 0
-#define HITARRAY_INDELS 1
-#define HITARRAY_SINGLESPLICING 2
-#define HITARRAY_DOUBLESPLICING 3
-#define HITARRAY_N 4
+/* Search order for paired-end reads:
+
+ 1. suffix array
+ 2. exact/subs, via spanning set algorithm
+ 3. subs/indels, via complete set algorithm
+ 4. segments -> single splicing
+ 5. segments -> double splicing (currently disabled)
+
+ 6. paired segments -> GMAP via segments
+ 7. distant splicing (needs to be before terminals, or we won't find them)
+ 8. terminals
+
+ 9. if still no concordance: GMAP pairsearch
+
+ in caller: consolidate: does GMAP via substrings
+*/
+
+
+#define HITARRAY_GREEDY 0
+#define HITARRAY_SUBS 1
+#define HITARRAY_INDELS 2
+#define HITARRAY_SINGLESPLICING 3
+#define HITARRAY_DOUBLESPLICING 4
+
+#if 0
+#define HITARRAY_LONGSINGLESPLICING 7
+#define HITARRAY_DISTANTSPLICING 8
+#define HITARRAY_SEGMENTS_GMAP 9
+#define HITARRAY_N 10
+#else
+#define HITARRAY_N 5
+#endif
static List_T
align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *cutoff_level_3,
- List_T *samechr, List_T *conc_transloc, List_T *with_terminal,
+ List_T *samechr, List_T *conc_transloc,
History_T gmap_history_5, History_T gmap_history_3, List_T *hits5, List_T *hits3, T this5, T this3,
Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
@@ -15881,11 +17806,16 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
List_T hitpairs = NULL, p;
Stage3pair_T newpair;
- List_T gmap5_hits, gmap3_hits, a;
+ List_T halfmapping5, halfmapping3, a;
Stage3end_T hit5, hit3, gmap5, gmap3;
List_T hitarray5[HITARRAY_N], hitarray3[HITARRAY_N];
- List_T subs5 = NULL, indels5 = NULL, ambiguous5 = NULL, singlesplicing5 = NULL, doublesplicing5 = NULL, terminals5 = NULL;
- List_T subs3 = NULL, indels3 = NULL, ambiguous3 = NULL, singlesplicing3 = NULL, doublesplicing3 = NULL, terminals3 = NULL;
+ List_T plus_anchor_segments_5 = NULL, minus_anchor_segments_5 = NULL, plus_anchor_segments_3 = NULL, minus_anchor_segments_3 = NULL;
+ List_T greedy5 = NULL, subs5 = NULL, terminals5 = NULL,
+ indels5 = NULL, ambiguous5 = NULL, singlesplicing5 = NULL, doublesplicing5 = NULL,
+ distantsplicing5 = NULL, gmap5_hits = NULL;
+ List_T greedy3 = NULL, subs3 = NULL, terminals3 = NULL,
+ indels3 = NULL, ambiguous3 = NULL, singlesplicing3 = NULL, doublesplicing3 = NULL,
+ distantsplicing3 = NULL, gmap3_hits = NULL;
List_T longsinglesplicing5 = NULL, longsinglesplicing3 = NULL;
int nmisses_allowed_sarray_5, nmisses_allowed_sarray_3;
int ignore_found_score, done_level_5, done_level_3, opt_level, fast_level_5, fast_level_3,
@@ -15897,7 +17827,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
List_T *donors_plus_3, *antidonors_plus_3, *acceptors_plus_3, *antiacceptors_plus_3,
*donors_minus_3, *antidonors_minus_3, *acceptors_minus_3, *antiacceptors_minus_3;
- bool spanningset5p, spanningset3p, completeset5p, completeset3p;
+ bool spanningset5p, spanningset3p, completeset5p, completeset3p, gmap5p, gmap3p;
bool did_alignment_p, did_singlesplicing5_p, did_singlesplicing3_p;
bool any_omitted_p_5, any_omitted_p_3;
Floors_T floors5, floors3;
@@ -15922,7 +17852,6 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
*samechr = (List_T) NULL;
*conc_transloc = (List_T) NULL;
- *with_terminal = (List_T) NULL;
*abort_pairing_p = false;
/* For paired-end alignment, ignore found_scores from single-end
@@ -16004,112 +17933,82 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
#ifndef LARGE_GENOMES
if (use_only_sarray_p == true) {
- Sarray_search_greedy(&(*cutoff_level_5),&subs5,&indels5,&ambiguous5,&singlesplicing5,&doublesplicing5,
- queryuc_ptr_5,queryrc5,querylength5,query5_compress_fwd,query5_compress_rev,
- nmisses_allowed_sarray_5,genestrand,/*first_read_p*/true);
- singlesplicing5 = Splice_group_by_segmenti(&ignore_found_score,singlesplicing5,&ambiguous5,querylength5,
- /*first_read_p*/true,/*sarrayp*/true);
- singlesplicing5 = Splice_group_by_segmentj(&ignore_found_score,singlesplicing5,&ambiguous5,querylength5,
- /*first_read_p*/true,/*sarrayp*/true);
- singlesplicing5 = List_append(singlesplicing5,ambiguous5);
-
-
- Sarray_search_greedy(&(*cutoff_level_3),&subs3,&indels3,&ambiguous3,&singlesplicing3,&doublesplicing3,
- queryuc_ptr_3,queryrc3,querylength3,query3_compress_fwd,query3_compress_rev,
- nmisses_allowed_sarray_3,genestrand,/*first_read_p*/false);
- singlesplicing3 = Splice_group_by_segmenti(&ignore_found_score,singlesplicing3,&ambiguous3,querylength3,
- /*first_read_p*/false,/*sarrayp*/true);
- singlesplicing3 = Splice_group_by_segmentj(&ignore_found_score,singlesplicing3,&ambiguous3,querylength3,
- /*first_read_p*/false,/*sarrayp*/true);
- singlesplicing3 = List_append(singlesplicing3,ambiguous3);
+ *hits5 = Sarray_search_greedy(&(*cutoff_level_5),
+ queryuc_ptr_5,queryrc5,querylength5,query5_compress_fwd,query5_compress_rev,maxpeelback,pairpool,
+ dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
+ nmisses_allowed_sarray_5,genestrand,/*first_read_p*/true);
- /* Need to run Stage3end_remove_duplicates before we append the results together */
- hitarray5[HITARRAY_SUBS] = subs5 = Stage3end_remove_duplicates(subs5);
- hitarray3[HITARRAY_SUBS] = subs3 = Stage3end_remove_duplicates(subs3);
- hitarray5[HITARRAY_INDELS] = indels5 = Stage3end_remove_duplicates(indels5);
- hitarray3[HITARRAY_INDELS] = indels3 = Stage3end_remove_duplicates(indels3);
+ *hits3 = Sarray_search_greedy(&(*cutoff_level_3),
+ queryuc_ptr_3,queryrc3,querylength3,query3_compress_fwd,query3_compress_rev,maxpeelback,pairpool,
+ dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
+ nmisses_allowed_sarray_3,genestrand,/*first_read_p*/false);
- *hits5 = List_append(subs5,List_append(indels5,List_append(singlesplicing5,doublesplicing5)));
- *hits3 = List_append(subs3,List_append(indels3,List_append(singlesplicing3,doublesplicing3)));
+ /* Need to run Stage3end_remove_duplicates before we append the results together */
+ hitarray5[HITARRAY_GREEDY] = *hits5;
+ hitarray3[HITARRAY_GREEDY] = *hits3;
if (*hits5 == NULL || *hits3 == NULL) {
return (List_T) NULL;
} else {
- hitarray5[HITARRAY_SINGLESPLICING] = singlesplicing5;
- hitarray3[HITARRAY_SINGLESPLICING] = singlesplicing3;
- hitarray5[HITARRAY_DOUBLESPLICING] = doublesplicing5;
- hitarray3[HITARRAY_DOUBLESPLICING] = doublesplicing3;
- debug(printf("sarray only: 5' end has %d subs, %d indels, %d single splices, %d double splices\n",
- List_length(subs5),List_length(indels5),List_length(singlesplicing5),List_length(doublesplicing5)));
- debug(printf("sarray only: 3' end has %d subs, %d indels, %d single splices, %d double splices\n",
- List_length(subs3),List_length(indels3),List_length(singlesplicing3),List_length(doublesplicing3)));
-
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),&(*with_terminal),
- hitpairs,hitarray5,/*narray5*/HITARRAY_DOUBLESPLICING+1,
- hitarray3,/*narray3*/HITARRAY_DOUBLESPLICING+1,
- /*terminals5*/NULL,/*terminals3*/NULL,
+ &(*samechr),&(*conc_transloc),
+ hitpairs,hitarray5,/*narray5*/HITARRAY_GREEDY+1,
+ hitarray3,/*narray3*/HITARRAY_GREEDY+1,
*cutoff_level_5,*cutoff_level_3,subopt_levels,
splicesites,query5_compress_fwd,query5_compress_rev,
query3_compress_fwd,query3_compress_rev,
querylength5,querylength3,maxpairedpaths,localsplicing_penalty,
genestrand);
+ debug(printf("SA> found_score = %d, done_level %d,%d\n",*found_score,done_level_5,done_level_3));
return Stage3pair_remove_circular_alias(hitpairs);
}
+ }
+#endif
- } else if (use_sarray_p == true) {
- /* Replaces spanning set */
- Sarray_search_greedy(&ignore_found_score,&subs5,&indels5,&ambiguous5,&singlesplicing5,&doublesplicing5,
- queryuc_ptr_5,queryrc5,querylength5,query5_compress_fwd,query5_compress_rev,
- nmisses_allowed_sarray_5,genestrand,/*first_read_p*/true);
- singlesplicing5 = Splice_group_by_segmenti(&ignore_found_score,singlesplicing5,&ambiguous5,querylength5,
- /*first_read_p*/true,/*sarrayp*/true);
- singlesplicing5 = Splice_group_by_segmentj(&ignore_found_score,singlesplicing5,&ambiguous5,querylength5,
- /*first_read_p*/true,/*sarrayp*/true);
- singlesplicing5 = List_append(singlesplicing5,ambiguous5);
-
-
- Sarray_search_greedy(&ignore_found_score,&subs3,&indels3,&ambiguous3,&singlesplicing3,&doublesplicing3,
- queryuc_ptr_3,queryrc3,querylength3,query3_compress_fwd,query3_compress_rev,
- nmisses_allowed_sarray_3,genestrand,/*first_read_p*/false);
- singlesplicing3 = Splice_group_by_segmenti(&ignore_found_score,singlesplicing3,&ambiguous3,querylength3,
- /*first_read_p*/false,/*sarrayp*/true);
- singlesplicing3 = Splice_group_by_segmentj(&ignore_found_score,singlesplicing3,&ambiguous3,querylength3,
- /*first_read_p*/false,/*sarrayp*/true);
- singlesplicing3 = List_append(singlesplicing3,ambiguous3);
-
-
- hitarray5[HITARRAY_SUBS] = subs5 = Stage3end_remove_duplicates(subs5);
- hitarray3[HITARRAY_SUBS] = subs3 = Stage3end_remove_duplicates(subs3);
- hitarray5[HITARRAY_INDELS] = indels5 = Stage3end_remove_duplicates(indels5);
- hitarray3[HITARRAY_INDELS] = indels3 = Stage3end_remove_duplicates(indels3);
- hitarray5[HITARRAY_SINGLESPLICING] = singlesplicing5;
- hitarray3[HITARRAY_SINGLESPLICING] = singlesplicing3;
- hitarray5[HITARRAY_DOUBLESPLICING] = doublesplicing5;
- hitarray3[HITARRAY_DOUBLESPLICING] = doublesplicing3;
- debug(printf("sarray initial: 5' end has %d subs, %d indels, %d single splices, %d double splices\n",
- List_length(subs5),List_length(indels5),List_length(singlesplicing5),List_length(doublesplicing5)));
- debug(printf("sarray initial: 3' end has %d subs, %d indels, %d single splices, %d double splices\n",
- List_length(subs3),List_length(indels3),List_length(singlesplicing3),List_length(doublesplicing3)));
-
+ /* Search 1: Suffix array */
+ completeset5p = completeset3p = true;
+#ifdef LARGE_GENOMES
+ spanningset5p = spanningset3p = true;
+#else
+ if (use_sarray_p == false) {
+ spanningset5p = spanningset3p = true;
+ } else {
+ spanningset5p = spanningset3p = false; /* By default, suffix array search replaces spanning set */
+
+ debug(printf("Trying suffix array on 5' end\n"));
+ greedy5 = Sarray_search_greedy(&ignore_found_score,
+ queryuc_ptr_5,queryrc5,querylength5,query5_compress_fwd,query5_compress_rev,maxpeelback,pairpool,
+ dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
+ nmisses_allowed_sarray_5,genestrand,/*first_read_p*/true);
+
+ debug(printf("Trying suffix array on 3' end\n"));
+ greedy3 = Sarray_search_greedy(&ignore_found_score,
+ queryuc_ptr_3,queryrc3,querylength3,query3_compress_fwd,query3_compress_rev,maxpeelback,pairpool,
+ dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
+ nmisses_allowed_sarray_3,genestrand,/*first_read_p*/false);
+
+ hitarray5[HITARRAY_GREEDY] = greedy5;
+ hitarray3[HITARRAY_GREEDY] = greedy3;
+ debug(printf("sarray initial: 5' end has %d greedy\n",List_length(greedy5)));
+ debug(printf("sarray initial: 3' end has %d greedy\n",List_length(greedy3)));
+
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),&(*with_terminal),
- hitpairs,hitarray5,/*narray5*/HITARRAY_DOUBLESPLICING+1,
- hitarray3,/*narray3*/HITARRAY_DOUBLESPLICING+1,
- /*terminals5*/NULL,/*terminals3*/NULL,
+ &(*samechr),&(*conc_transloc),
+ hitpairs,hitarray5,/*narray5*/HITARRAY_GREEDY+1,
+ hitarray3,/*narray3*/HITARRAY_GREEDY+1,
*cutoff_level_5,*cutoff_level_3,subopt_levels,
splicesites,query5_compress_fwd,query5_compress_rev,
query3_compress_fwd,query3_compress_rev,
querylength5,querylength3,maxpairedpaths,localsplicing_penalty,
genestrand);
-
+
debug(printf("After pairing sarray, found %d concordant, %d samechr, found_score %d\n",
nconcordant,nsamechr,*found_score));
if (*abort_pairing_p == true) {
- *hits5 = subs5;
- *hits3 = subs3;
+ *hits5 = greedy5;
+ *hits3 = greedy3;
hitpairs = Stage3pair_remove_circular_alias(hitpairs);
#if 0
hitpairs = Stage3pair_remove_overlaps(hitpairs,/*translocp*/false,/*finalp*/true);
@@ -16124,57 +18023,70 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
if ((done_level_3 = opt_level + subopt_levels) > user_maxlevel_3) {
done_level_3 = user_maxlevel_3;
}
- debug(printf("SA> found_score = %d, opt_level %d, done_level %d,%d\n",*found_score,opt_level,done_level_5,done_level_3));
- }
- nhits5 = List_length(subs5);
- nhits3 = List_length(subs3);
- }
-#endif
-
- if (nconcordant == 0) {
- spanningset5p = spanningset3p = true;
- } else {
- spanningset5p = spanningset3p = false;
- if (better_free_end_exists_p(subs5,indels5,singlesplicing5,doublesplicing5,querylength5) == true) {
- spanningset3p = true; /* Do search on other end */
+ debug(printf("SA> found_score = %d, opt_level %d, done_level %d,%d\n",*found_score,opt_level,done_level_5,done_level_3));
}
- if (better_free_end_exists_p(subs3,indels3,singlesplicing3,doublesplicing3,querylength3) == true) {
- spanningset5p = true; /* Do search on other end */
+ nhits5 = List_length(greedy5);
+ nhits3 = List_length(greedy3);
+
+ debug(printf("nconcordant %d\n",nconcordant));
+ if (nconcordant == 0) {
+ /* Need to have this to compensate for greediness of suffix array algorithm */
+ debug(printf("nconcordant is 0, so we are doing spanningset\n"));
+ spanningset5p = spanningset3p = true;
+ } else if (*found_score >= done_level_5 + done_level_3) {
+ debug(printf("found_score %d >= done_level_5 %d + done_level_3 %d,, so we are doing spanningset\n",
+ *found_score,done_level_5,done_level_3));
+ spanningset5p = spanningset3p = true;
}
}
+#endif
- debug(printf("After sarray, found_score = %d\n",*found_score));
+ /* Search 2: Exact/subs via spanning set algorithm */
if (spanningset5p == true || spanningset3p == true) {
/* 1A. Exact. Requires compress if cmet or genomealt. Creates and uses spanning set. */
debug(printf("Performing spanning set with found_score %d\n",*found_score));
mismatch_level_5 = 0;
- if (allvalidp5 == false) {
- debug(printf("Not all oligos in 5' end are valid, so cannot perform spanning set\n"));
- fast_level_5 = -1;
- } else if (spanningset5p == true) {
- debug(printf("fast_level_5 = %d\n",fast_level_5));
- debug(printf("*** Stage 1. Exact ***\n"));
- ignore_found_score = *found_score;
- subs5 = find_spanning_exact_matches(&ignore_found_score,&nhits5,subs5,this5,genestrand,/*first_read_p*/true,
- querylength5,query5_lastpos,plus_indexdb_5,minus_indexdb_5,
- query5_compress_fwd,query5_compress_rev);
- mismatch_level_5 = 1;
+ if (done_level_5 == 0 && snpp == false) {
+ debug(printf("Suffix array already found exact matches for 5' end and no SNPs, so spanning set can't do any better\n"));
+ } else {
+ read_oligos(&allvalidp5,this5,queryuc_ptr_5,querylength5,query5_lastpos,genestrand,
+ /*first_read_p*/true);
+ if (allvalidp5 == false) {
+ debug(printf("Not all oligos in 5' end are valid, so cannot perform spanning set\n"));
+ fast_level_5 = -1;
+ spanningset5p = false;
+ } else if (spanningset5p == true) {
+ debug(printf("fast_level_5 = %d\n",fast_level_5));
+ debug(printf("*** Stage 1. Exact ***\n"));
+ ignore_found_score = *found_score;
+ subs5 = find_spanning_exact_matches(&ignore_found_score,&nhits5,subs5,this5,genestrand,/*first_read_p*/true,
+ querylength5,query5_lastpos,plus_indexdb_5,minus_indexdb_5,
+ query5_compress_fwd,query5_compress_rev);
+ mismatch_level_5 = 1;
+ }
}
/* 1B. Exact. Requires compress if cmet or genomealt. Creates and uses spanning set. */
mismatch_level_3 = 0;
- if (allvalidp3 == false) {
- debug(printf("Not all oligos in 3' end are valid, so cannot perform spanning set\n"));
- fast_level_3 = -1;
- } else if (spanningset3p == true) {
- debug(printf("fast_level_3 = %d\n",fast_level_3));
- debug(printf("*** Stage 1. Exact ***\n"));
- ignore_found_score = *found_score;
- subs3 = find_spanning_exact_matches(&ignore_found_score,&nhits3,subs3,this3,genestrand,/*first_read_p*/false,
- querylength3,query3_lastpos,plus_indexdb_3,minus_indexdb_3,
- query3_compress_fwd,query3_compress_rev);
- mismatch_level_3 = 1;
+ if (done_level_3 == 0 && snpp == false) {
+ debug(printf("Suffix array already found exact matches for 3' end and no SNPs, so spanning set can't do any better\n"));
+ } else {
+ read_oligos(&allvalidp3,this3,queryuc_ptr_3,querylength3,query3_lastpos,genestrand,
+ /*first_read_p*/false);
+ if (allvalidp3 == false) {
+ debug(printf("Not all oligos in 3' end are valid, so cannot perform spanning set\n"));
+ fast_level_3 = -1;
+ spanningset3p = false;
+ } else if (spanningset3p == true) {
+ debug(printf("fast_level_3 = %d\n",fast_level_3));
+ debug(printf("*** Stage 1. Exact ***\n"));
+ ignore_found_score = *found_score;
+ subs3 = find_spanning_exact_matches(&ignore_found_score,&nhits3,subs3,this3,genestrand,/*first_read_p*/false,
+ querylength3,query3_lastpos,plus_indexdb_3,minus_indexdb_3,
+ query3_compress_fwd,query3_compress_rev);
+ mismatch_level_3 = 1;
+ }
}
/* 1. Pairing after exact */
@@ -16182,10 +18094,9 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitarray5[HITARRAY_SUBS] = subs5; /* = Stage3end_remove_duplicates(subs5) */;
hitarray3[HITARRAY_SUBS] = subs3; /* = Stage3end_remove_duplicates(subs3) */;
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),&(*with_terminal),
+ &(*samechr),&(*conc_transloc),
hitpairs,hitarray5,/*narray5*/HITARRAY_SUBS+1,
hitarray3,/*narray3*/HITARRAY_SUBS+1,
- /*terminals5*/NULL,/*terminals3*/NULL,
*cutoff_level_5,*cutoff_level_3,subopt_levels,
splicesites,query5_compress_fwd,query5_compress_rev,
query3_compress_fwd,query3_compress_rev,
@@ -16194,8 +18105,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
debug(printf("After pairing exact, found %d concordant, %d samechr, found_score %d\n",
nconcordant,nsamechr,*found_score));
if (*abort_pairing_p == true) {
- *hits5 = subs5;
- *hits3 = subs3;
+ *hits5 = List_append(greedy5,subs5);
+ *hits3 = List_append(greedy3,subs3);
return hitpairs;
} else {
opt_level = (*found_score < opt_level) ? *found_score : opt_level;
@@ -16211,7 +18122,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
did_alignment_p = false;
/* 2A. One mismatch. Requires spanning set and compress. */
- if (spanningset5p && allvalidp5 && querylength5 >= one_miss_querylength && done_level_5 >= 1) {
+ if (spanningset5p /*&& allvalidp5*/ && querylength5 >= one_miss_querylength && done_level_5 >= 1) {
debug(printf("*** Stage 2A. One miss ***\n"));
did_alignment_p = true;
ignore_found_score = *found_score;
@@ -16221,7 +18132,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
}
/* 2B. One mismatch. Requires spanning set and compress. */
- if (spanningset3p && allvalidp3 && querylength3 >= one_miss_querylength && done_level_3 >= 1) {
+ if (spanningset3p /*&& allvalidp3*/ && querylength3 >= one_miss_querylength && done_level_3 >= 1) {
debug(printf("*** Stage 2B. One miss ***\n"));
did_alignment_p = true;
ignore_found_score = *found_score;
@@ -16235,10 +18146,9 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitarray5[HITARRAY_SUBS] = subs5 /* = Stage3end_remove_duplicates(subs5,queryseq5,queryseq3) */;
hitarray3[HITARRAY_SUBS] = subs3 /* = Stage3end_remove_duplicates(subs3,queryseq5,queryseq3) */;
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),&(*with_terminal),
+ &(*samechr),&(*conc_transloc),
hitpairs,hitarray5,/*narray5*/HITARRAY_SUBS+1,
hitarray3,/*narray3*/HITARRAY_SUBS+1,
- /*terminals5*/NULL,/*terminals3*/NULL,
*cutoff_level_5,*cutoff_level_3,subopt_levels,
splicesites,query5_compress_fwd,query5_compress_rev,
query3_compress_fwd,query3_compress_rev,
@@ -16270,7 +18180,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
did_alignment_p = false;
/* 3A. Mismatches via spanning set. Requires spanning set and compress. */
- if (spanningset5p && allvalidp5 && done_level_5 >= 2) {
+ if (spanningset5p /*&& allvalidp5*/ && done_level_5 >= 2) {
/* NOTE: Since done_level isn't updated, can do in one batch instead of iteratively */
while (mismatch_level_5 <= fast_level_5 && mismatch_level_5 <= done_level_5) {
debug(printf("*** Stage 3A (level %d). Spanning set mismatches ***\n",mismatch_level_5));
@@ -16284,7 +18194,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
}
/* 3B. Mismatches via spanning set. Requires spanning set and compress. */
- if (spanningset3p && allvalidp3 && done_level_3 >= 2) {
+ if (spanningset3p /*&& allvalidp3*/ && done_level_3 >= 2) {
/* NOTE: Since done_level isn't updated, can do in one batch instead of iteratively */
while (mismatch_level_3 <= fast_level_3 && mismatch_level_3 <= done_level_3) {
debug(printf("*** Stage 3B (level %d). Spanning set mismatches ***\n",mismatch_level_3));
@@ -16302,10 +18212,9 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitarray5[HITARRAY_SUBS] = subs5 /* = Stage3end_remove_duplicates(subs5,queryseq5,queryseq3) */;
hitarray3[HITARRAY_SUBS] = subs3 /* = Stage3end_remove_duplicates(subs3,queryseq5,queryseq3) */;
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),&(*with_terminal),
+ &(*samechr),&(*conc_transloc),
hitpairs,hitarray5,/*narray5*/HITARRAY_SUBS+1,
hitarray3,/*narray3*/HITARRAY_SUBS+1,
- /*terminals5*/NULL,/*terminals3*/NULL,
*cutoff_level_5,*cutoff_level_3,subopt_levels,
splicesites,query5_compress_fwd,query5_compress_rev,
query3_compress_fwd,query3_compress_rev,
@@ -16314,8 +18223,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
debug(printf("After pairing spanning set, found %d concordant, %d samechr, found_score %d\n",
nconcordant,nsamechr,*found_score));
if (*abort_pairing_p == true) {
- *hits5 = subs5;
- *hits3 = subs3;
+ *hits5 = List_append(greedy5,subs5);
+ *hits3 = List_append(greedy3,subs3);
hitpairs = Stage3pair_remove_circular_alias(hitpairs);
#if 0
hitpairs = Stage3pair_remove_overlaps(hitpairs,/*translocp*/false,/*finalp*/true);
@@ -16335,28 +18244,30 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
}
- if (hitpairs == NULL) {
- completeset5p = completeset3p = true;
- } else {
+ /* Search 3: Subs/indels via complete set algorithm */
+
+ /* 4/5A. Complete set mismatches and indels, omitting frequent oligos */
+ if (*found_score <= done_level_5 + done_level_3) {
+ debug(printf("Test for completeset: false because *found_score %d < done_level_5 %d + done_level_3 %d\n",
+ *found_score,done_level_5,done_level_3));
completeset5p = completeset3p = false;
- if (better_free_end_exists_p(subs5,indels5,singlesplicing5,doublesplicing5,querylength5) == true) {
- completeset3p = true; /* Do search on other end */
+ } else {
+ if (better_free_end_exists_p(greedy5,subs5,terminals5,indels5,singlesplicing5,doublesplicing5,querylength5) == true) {
+ completeset3p = true; /* Do search on other end using complete set algorithm */
}
- if (better_free_end_exists_p(subs3,indels3,singlesplicing3,doublesplicing3,querylength3) == true) {
- completeset5p = true; /* Do search on other end */
+ if (better_free_end_exists_p(greedy3,subs3,terminals3,indels3,singlesplicing3,doublesplicing3,querylength3) == true) {
+ completeset5p = true; /* Do search on other end using complete set algorithm */
}
+ debug(printf("Test for completeset using better_free_end_exists_p: completeset5p %d, completeset3p %d\n",completeset5p,completeset3p));
}
- did_alignment_p = false;
-
- /* 4/5A. Complete set mismatches and indels, omitting frequent oligos */
- if (use_sarray_p == true && completeset5p == false) {
- /* Skip. Suffix array already found something */
- debug(printf("Skipping complete set on 5', because sarray found a hitpair\n"));
- } else if (done_level_5 > fast_level_5 || done_level_5 >= indel_penalty_middle || done_level_5 >= indel_penalty_end) {
- did_alignment_p = true;
+ if (completeset5p == true) {
+ debug(printf("Performing complete set analysis on 5' end\n"));
+ if (this5->read_oligos_p == false) {
+ read_oligos(&allvalidp5,this5,queryuc_ptr_5,querylength5,query5_lastpos,genestrand,
+ /*first_read_p*/true);
+ }
-#if 1
floors5 = compute_floors(&any_omitted_p_5,&alloc_floors_p_5,floors_array,this5,
querylength5,query5_lastpos,plus_indexdb_5,minus_indexdb_5,
indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
@@ -16364,6 +18275,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
floors5_computed_p = true;
ignore_found_score = *found_score;
complete_set_mm_indels(&ignore_found_score,&segments5_computed_p,
+ &plus_anchor_segments_5,&minus_anchor_segments_5,
&opt_level,&done_level_5,user_maxlevel_5,/*revise_levels_p*/false,
&nhits5,&subs5,&indels5,this5,query5_compress_fwd,query5_compress_rev,
#if defined(DEBUG2) || defined(DEBUG2E)
@@ -16372,70 +18284,17 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
querylength5,query5_lastpos,floors5,indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
fast_level_5,genestrand,/*first_read_p*/true);
-
-#else
- /* Using obsolete masktype */
- if (masktype == MASK_NONE) {
- debug(printf("*** Stage 4A,5A. Complete mm/indels with no masking with done_level %d ***\n",done_level_5));
- ignore_found_score = *found_score;
- complete_set_mm_indels(&ignore_found_score,&segments5_computed_p,
- &any_omitted_p_5,&opt_level,&done_level_5,user_maxlevel_5,/*revise_levels_p*/false,
- &nhits5,&subs5,&indels5,this5,query5_compress_fwd,query5_compress_rev,
-#if defined(DEBUG2) || defined(DEBUG2E)
- queryuc_ptr_5,queryrc5,
-#endif
- querylength5,query5_lastpos,plus_indexdb_5,minus_indexdb_5,
- indexdb_size_threshold,floors_array,indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- fast_level_5,/*omit_frequent_p*/false,/*omit_repetitive_p*/false,keep_floors_p,
- genestrand,/*first_read_p*/true);
- } else {
- debug(printf("*** Stage 4A,5A. Complete mm/indels masking frequent oligos with done_level %d ***\n",done_level_5));
- ignore_found_score = *found_score;
- complete_set_mm_indels(&ignore_found_score,&segments5_computed_p,
- &any_omitted_p_5,&opt_level,&done_level_5,user_maxlevel_5,/*revise_levels_p*/false,
- &nhits5,&subs5,&indels5,this5,query5_compress_fwd,query5_compress_rev,
-#if defined(DEBUG2) || defined(DEBUG2E)
- queryuc_ptr_5,queryrc5,
-#endif
- querylength5,query5_lastpos,plus_indexdb_5,minus_indexdb_5,
- indexdb_size_threshold,floors_array,indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- fast_level_5,/*omit_frequent_p*/true,
- /*omit_repetitive_p*/(masktype == MASK_REPETITIVE || masktype == MASK_GREEDY_REPETITIVE) ? true : false,
- keep_floors_p,genestrand,/*first_read_p*/true);
- if ((masktype == MASK_GREEDY_FREQUENT || masktype == MASK_GREEDY_REPETITIVE) && subs5 == NULL && indels5 == NULL && any_omitted_p_5 == true) {
- FREE(this->minus_segments_5);
- FREE(this->plus_segments_5);
-
- /* 4/5A. Complete set mismatches and indels, with all oligos */
- debug(printf("*** Stage 4A,5A. Complete mm/indels with no masking with done_level %d ***\n",done_level_5));
- ignore_found_score = *found_score;
- complete_set_mm_indels(&ignore_found_score,&segments5_computed_p,
- &any_omitted_p_5,&opt_level,&done_level_5,user_maxlevel_5,/*revise_levels_p*/false,
- &nhits5,&subs5,&indels5,this5,query5_compress_fwd,query5_compress_rev,
-#if defined(DEBUG2) || defined(DEBUG2E)
- queryuc_ptr_5,queryrc5,
-#endif
- querylength5,query5_lastpos,plus_indexdb_5,minus_indexdb_5,
- indexdb_size_threshold,floors_array,indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- fast_level_5,/*omit_frequent_p*/false,/*omit_repetitive_p*/false,keep_floors_p,
- genestrand,/*first_read_p*/true);
- }
- }
-#endif
}
/* 4/5B. Complete set mismatches and indels, omitting frequent oligos */
- if (use_sarray_p == true && completeset3p == false) {
- /* Skip. Suffix array already found something */
- debug(printf("Skipping complete set on 3', because sarray found a hitpair\n"));
+ if (completeset3p == true) {
+ debug(printf("Performing complete set analysis on 3' end\n"));
- } else if (done_level_3 > fast_level_3 || done_level_3 >= indel_penalty_middle || done_level_3 >= indel_penalty_end) {
- did_alignment_p = true;
+ if (this3->read_oligos_p == false) {
+ read_oligos(&allvalidp3,this3,queryuc_ptr_3,querylength3,query3_lastpos,genestrand,
+ /*first_read_p*/false);
+ }
-#if 1
floors3 = compute_floors(&any_omitted_p_3,&alloc_floors_p_3,floors_array,this3,
querylength3,query3_lastpos,plus_indexdb_3,minus_indexdb_3,
indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
@@ -16443,6 +18302,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
floors3_computed_p = true;
ignore_found_score = *found_score;
complete_set_mm_indels(&ignore_found_score,&segments3_computed_p,
+ &plus_anchor_segments_3,&minus_anchor_segments_3,
&opt_level,&done_level_3,user_maxlevel_3,/*revise_levels_p*/false,
&nhits3,&subs3,&indels3,this3,query3_compress_fwd,query3_compress_rev,
#if defined(DEBUG2) || defined(DEBUG2E)
@@ -16451,77 +18311,24 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
querylength3,query3_lastpos,floors3,indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
fast_level_3,genestrand,/*first_read_p*/false);
-
-#else
- if (masktype == MASK_NONE) {
- debug(printf("*** Stage 4B,5B. Complete mm/indels with no masking with done_level %d ***\n",done_level_3));
- ignore_found_score = *found_score;
- complete_set_mm_indels(&ignore_found_score,&segments3_computed_p,
- &any_omitted_p_3,&opt_level,&done_level_3,user_maxlevel_3,/*revise_levels_p*/false,
- &nhits3,&subs3,&indels3,this3,query3_compress_fwd,query3_compress_rev,queryuc_ptr_3,
-#if defined(DEBUG2) || defined(DEBUG2E)
- queryrc3,
-#endif
- querylength3,query3_lastpos,plus_indexdb_3,minus_indexdb_3,
- indexdb_size_threshold,floors_array,indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- fast_level_3,/*omit_frequent_p*/false,/*omit_repetitive_p*/false,keep_floors_p,
- genestrand,/*first_read_p*/false);
- } else {
- debug(printf("*** Stage 4B,5B. Complete mm/indels masking frequent oligos with done_level %d ***\n",done_level_3));
- ignore_found_score = *found_score;
- complete_set_mm_indels(&ignore_found_score,&segments3_computed_p,
- &any_omitted_p_3,&opt_level,&done_level_3,user_maxlevel_3,/*revise_levels_p*/false,
- &nhits3,&subs3,&indels3,this3,query3_compress_fwd,query3_compress_rev,
-#if defined(DEBUG2) || defined(DEBUG2E)
- queryuc_ptr_3,queryrc3,
-#endif
- querylength3,query3_lastpos,plus_indexdb_3,minus_indexdb_3,
- indexdb_size_threshold,floors_array,indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- fast_level_3,/*omit_frequent_p*/true,
- /*omit_repetitive_p*/(masktype == MASK_REPETITIVE || masktype == MASK_GREEDY_REPETITIVE) ? true : false,
- keep_floors_p,genestrand,/*first_read_p*/false);
- if ((masktype == MASK_GREEDY_FREQUENT || masktype == MASK_GREEDY_REPETITIVE) && subs3 == NULL && indels3 == NULL && any_omitted_p_3 == true) {
- FREE(this->minus_segments_3);
- FREE(this->plus_segments_3);
-
- /* 4/5B. Complete set mismatches and indels, with all oligos */
- debug(printf("*** Stage 4B,5B. Complete mm/indels with no masking with done_level %d ***\n",done_level_3));
- ignore_found_score = *found_score;
- complete_set_mm_indels(&ignore_found_score,&segments3_computed_p,
- &any_omitted_p_3,&opt_level,&done_level_3,user_maxlevel_3,/*revise_levels_p*/false,
- &nhits3,&subs3,&indels3,this3,query3_compress_fwd,query3_compress_rev,
-#if defined(DEBUG2) || defined(DEBUG2E)
- queryuc_ptr_3,queryrc3,
-#endif
- querylength3,query3_lastpos,plus_indexdb_3,minus_indexdb_3,
- indexdb_size_threshold,floors_array,indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- fast_level_3,/*omit_frequent_p*/false,/*omit_repetitive_p*/false,keep_floors_p,
- genestrand,/*first_read_p*/false);
- }
- }
-#endif
}
- debug(printf("complete: 5' end has %d subs, %d indels, %d single splices, %d double splices\n",
- List_length(subs5),List_length(indels5),List_length(singlesplicing5),List_length(doublesplicing5)));
- debug(printf("complete: 3' end has %d subs, %d indels, %d single splices, %d double splices\n",
- List_length(subs3),List_length(indels3),List_length(singlesplicing3),List_length(doublesplicing3)));
+ debug(printf("complete: 5' end has %d subs, %d indels, %d single splices\n",
+ List_length(subs5),List_length(indels5),List_length(singlesplicing5)));
+ debug(printf("complete: 3' end has %d subs, %d indels, %d single splices\n",
+ List_length(subs3),List_length(indels3),List_length(singlesplicing3)));
- if (did_alignment_p == true) {
+ if (completeset5p == true || completeset3p == true) {
/* 4/5. Pairing after complete set subs and indels */
debug(printf("Starting pairing of 4 and 5\n"));
- hitarray5[HITARRAY_SUBS] = subs5 /* = Stage3end_remove_duplicates(subs5,queryseq5,queryseq3) */;
- hitarray5[HITARRAY_INDELS] = indels5 /* = Stage3end_remove_duplicates(indels5,queryseq5,queryseq3) */;
- hitarray3[HITARRAY_SUBS] = subs3 /* = Stage3end_remove_duplicates(subs3,queryseq5,queryseq3) */;
- hitarray3[HITARRAY_INDELS] = indels3 /* = Stage3end_remove_duplicates(indels3,queryseq5,queryseq3) */;
+ hitarray5[HITARRAY_SUBS] = subs5;
+ hitarray5[HITARRAY_INDELS] = indels5;
+ hitarray3[HITARRAY_SUBS] = subs3;
+ hitarray3[HITARRAY_INDELS] = indels3;
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),&(*with_terminal),
+ &(*samechr),&(*conc_transloc),
hitpairs,hitarray5,/*narray5*/HITARRAY_INDELS+1,
hitarray3,/*narray3*/HITARRAY_INDELS+1,
- /*terminals5*/NULL,/*terminals3*/NULL,
*cutoff_level_5,*cutoff_level_3,subopt_levels,
splicesites,query5_compress_fwd,query5_compress_rev,
query3_compress_fwd,query3_compress_rev,
@@ -16530,8 +18337,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
debug(printf("After pairing complete set mismatches and indels, found %d concordant, %d nsamechr, found_score %d\n",
nconcordant,nsamechr,*found_score));
if (*abort_pairing_p == true) {
- *hits5 = List_append(subs5,indels5);
- *hits3 = List_append(subs3,indels3);
+ *hits5 = List_append(greedy5,List_append(subs5,indels5));
+ *hits3 = List_append(greedy3,List_append(subs3,indels3));
#if 0
hitpairs = Stage3pair_remove_circular_alias(hitpairs);
#endif
@@ -16547,23 +18354,21 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
}
debug(printf("4/5> found_score = %d, opt_level %d, done_level %d,%d\n",*found_score,opt_level,done_level_5,done_level_3));
}
- }
- if (hitpairs == NULL) {
- completeset5p = completeset3p = true;
- } else {
- completeset5p = completeset3p = false;
- if (better_free_end_exists_p(subs5,indels5,singlesplicing5,doublesplicing5,querylength5) == true) {
- completeset3p = true; /* Do search on other end */
- }
- if (better_free_end_exists_p(subs3,indels3,singlesplicing3,doublesplicing3,querylength3) == true) {
- completeset5p = true; /* Do search on other end */
+ if (*found_score <= done_level_5 + done_level_3) {
+ debug(printf("Test for completeset: false because *found_score %d <done_level_5 %d + done_level_3 %d\n",
+ *found_score,done_level_5,done_level_3));
+ completeset5p = completeset3p = false;
}
}
+
+ /* Search 4: Segments -> single splicing */
+
/* 6/7/8. Local splicing. Requires compress and all positions fetched. */
/* Subtract 1 from done_level for previous hits */
did_singlesplicing5_p = false;
+ /* SPEED: For more hits, turn off first branch */
if (use_sarray_p == true && completeset5p == false) {
/* Skip. Suffix array already found something */
debug(printf("Skipping complete set on 5', because sarray found a hitpair\n"));
@@ -16584,22 +18389,24 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
}
if (segments5_computed_p == false) {
- this5->plus_segments = identify_all_segments(&this5->plus_nsegments,&this5->plus_spliceable,&this5->plus_nspliceable,
+ this5->plus_segments = identify_all_segments(&this5->plus_nsegments,&plus_anchor_segments_5,
+ &this5->plus_spliceable,&this5->plus_nspliceable,
#ifdef LARGE_GENOMES
this5->plus_positions_high,this5->plus_positions_low,
#else
this5->plus_positions,
#endif
this5->plus_npositions,this5->omitted,querylength5,query5_lastpos,floors5,
- /*plusp*/true);
- this5->minus_segments = identify_all_segments(&this5->minus_nsegments,&this5->minus_spliceable,&this5->minus_nspliceable,
+ /*max_mismatches_allowed*/done_level_5,/*plusp*/true);
+ this5->minus_segments = identify_all_segments(&this5->minus_nsegments,&minus_anchor_segments_5,
+ &this5->minus_spliceable,&this5->minus_nspliceable,
#ifdef LARGE_GENOMES
this5->minus_positions_high,this5->minus_positions_low,
#else
this5->minus_positions,
#endif
this5->minus_npositions,this5->omitted,querylength5,query5_lastpos,floors5,
- /*plusp*/false);
+ /*max_mismatches_allowed*/done_level_5,/*plusp*/false);
segments5_computed_p = true;
}
@@ -16616,6 +18423,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
}
did_singlesplicing3_p = false;
+ /* SPEED: For more hits, turn off first branch */
if (use_sarray_p == true && completeset3p == false) {
/* Skip. Suffix array already found something */
debug(printf("Skipping complete set on 3', because sarray found a hitpair\n"));
@@ -16635,22 +18443,24 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
}
if (segments3_computed_p == false) {
- this3->plus_segments = identify_all_segments(&this3->plus_nsegments,&this3->plus_spliceable,&this3->plus_nspliceable,
+ this3->plus_segments = identify_all_segments(&this3->plus_nsegments,&plus_anchor_segments_3,
+ &this3->plus_spliceable,&this3->plus_nspliceable,
#ifdef LARGE_GENOMES
this3->plus_positions_high,this3->plus_positions_low,
#else
this3->plus_positions,
#endif
this3->plus_npositions,this3->omitted,querylength3,query3_lastpos,floors3,
- /*plusp*/true);
- this3->minus_segments = identify_all_segments(&this3->minus_nsegments,&this3->minus_spliceable,&this3->minus_nspliceable,
+ /*max_mismatches_allowed*/done_level_3,/*plusp*/true);
+ this3->minus_segments = identify_all_segments(&this3->minus_nsegments,&minus_anchor_segments_3,
+ &this3->minus_spliceable,&this3->minus_nspliceable,
#ifdef LARGE_GENOMES
this3->minus_positions_high,this3->minus_positions_low,
#else
this3->minus_positions,
#endif
this3->minus_npositions,this3->omitted,querylength3,query3_lastpos,floors3,
- /*plusp*/false);
+ /*max_mismatches_allowed*/done_level_3,/*plusp*/false);
segments3_computed_p = true;
}
@@ -16673,10 +18483,9 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
hitarray3[HITARRAY_SINGLESPLICING] = singlesplicing3;
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),&(*with_terminal),
+ &(*samechr),&(*conc_transloc),
hitpairs,hitarray5,/*narray5*/HITARRAY_SINGLESPLICING+1,
hitarray3,/*narray3*/HITARRAY_SINGLESPLICING+1,
- /*terminals5*/NULL,/*terminals3*/NULL,
*cutoff_level_5,*cutoff_level_3,subopt_levels,
splicesites,query5_compress_fwd,query5_compress_rev,
query3_compress_fwd,query3_compress_rev,
@@ -16691,8 +18500,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
if (alloc_floors_p_3 == true) {
Floors_free(&floors3);
}
- *hits5 = List_append(subs5,List_append(indels5,singlesplicing5));
- *hits3 = List_append(subs3,List_append(indels3,singlesplicing3));
+ *hits5 = List_append(greedy5,List_append(subs5,List_append(indels5,singlesplicing5)));
+ *hits3 = List_append(greedy3,List_append(subs3,List_append(indels3,singlesplicing3)));
#if 0
hitpairs = Stage3pair_remove_circular_alias(hitpairs);
#endif
@@ -16711,7 +18520,11 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
*found_score,opt_level,done_level_5,done_level_3));
}
- /* 7. Double splicing */
+
+ /* Search 5: Segments -> single splicing */
+#ifdef PERFORM_DOUBLESPLICING
+
+ /* 7. Double splicing. Probably found instead by segment-to-GMAP algorithm */
if (done_level_5 >= localsplicing_penalty) {
debug(printf("*** Stage 7A. Double splicing masking frequent oligos with done_level %d ***\n",done_level_5));
if (floors5_computed_p == false) {
@@ -16757,10 +18570,9 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
debug(printf("Starting Stage3_pair_up_concordant\n"));
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),&(*with_terminal),
+ &(*samechr),&(*conc_transloc),
hitpairs,hitarray5,/*narray5*/HITARRAY_DOUBLESPLICING+1,
hitarray3,/*narray3*/HITARRAY_DOUBLESPLICING+1,
- /*terminals5*/NULL,/*terminals3*/NULL,
*cutoff_level_5,*cutoff_level_3,subopt_levels,
splicesites,query5_compress_fwd,query5_compress_rev,
query3_compress_fwd,query3_compress_rev,
@@ -16775,8 +18587,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
if (alloc_floors_p_3 == true) {
Floors_free(&floors3);
}
- *hits5 = List_append(subs5,List_append(indels5,List_append(singlesplicing5,doublesplicing5)));
- *hits3 = List_append(subs3,List_append(indels3,List_append(singlesplicing3,doublesplicing3)));
+ *hits5 = List_append(greedy5,List_append(subs5,List_append(indels5,List_append(singlesplicing5,doublesplicing5))));
+ *hits3 = List_append(greedy3,List_append(subs3,List_append(indels3,List_append(singlesplicing3,doublesplicing3))));
hitpairs = Stage3pair_remove_circular_alias(hitpairs);
#if 0
hitpairs = Stage3pair_remove_overlaps(hitpairs,/*translocp*/false,/*finalp*/true);
@@ -16794,357 +18606,156 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
debug(printf("Pairing after 7A and 7B> found_score = %d, opt_level %d, done_level %d,%d\n",
*found_score,opt_level,done_level_5,done_level_3));
}
+#endif
+ }
- alloc5p = false;
- if (knownsplicingp == true && done_level_5 >= localsplicing_penalty) {
- /* Want >= and not > to give better results. Negligible effect on speed. */
- /* 8A. Shortend splicing */
- max_splice_mismatches_5 = done_level_5 - localsplicing_penalty;
-
- alloc5p = true;
- donors_plus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
- antidonors_plus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
- acceptors_plus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
- antiacceptors_plus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
- donors_minus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
- antidonors_minus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
- acceptors_minus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
- antiacceptors_minus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
-
- find_spliceends_shortend(&donors_plus_5,&antidonors_plus_5,&acceptors_plus_5,&antiacceptors_plus_5,
- this5->plus_segments,this5->plus_nsegments,
-#ifdef DEBUG4E
- queryuc_ptr_5,
+ debug(printf("nconcordant = %d. found_score %d, trigger_score %d, done level %d + %d\n",
+ nconcordant,*found_score,trigger_score_for_gmap,done_level_5,done_level_3));
+
+ *hits5 = List_append(greedy5,List_append(subs5,List_append(indels5,List_append(singlesplicing5,doublesplicing5))));
+ *hits3 = List_append(greedy3,List_append(subs3,List_append(indels3,List_append(singlesplicing3,doublesplicing3))));
+
+
+ /* Search 6: Paired egments -> GMAP via segments */
+
+ gmap5p = gmap3p = true;
+ if (gmap_segments_p == false) {
+ debug(printf("gmap_segments_p is false, so setting gmap5p and gmap3p false\n"));
+ gmap5p = gmap3p = false;
+ } else if (*abort_pairing_p == true) {
+ debug(printf("abort_pairing_p is true, so setting gmap5p and gmap3p false\n"));
+ gmap5p = gmap3p = false;
+ } else if (nconcordant > 0) {
+ /* Rely upon GMAP improvement instead */
+ debug(printf("nconcordant == 0, so setting gmap5p and gmap3p false\n"));
+ gmap5p = gmap3p = false;
+ } else if (*found_score < trigger_score_for_gmap) {
+ debug(printf("found_score %d < trigger_score_for_gmap %d, so setting gmap5p and gmap3p false\n",
+ *found_score,trigger_score_for_gmap));
+ gmap5p = gmap3p = false;
+ } else if (*found_score < done_level_5 + done_level_3) {
+ debug(printf("found_score %d < done_level_5 %d + done_level_3 %d, so setting gmap5p and gmap3p false\n",
+ *found_score,done_level_5,done_level_3));
+ gmap5p = gmap3p = false;
+ }
+
+ if (gmap5p == true || gmap3p == true) {
+ debug(printf("***Trying to pair up segments***\n"));
+ pair_up_anchor_segments(plus_anchor_segments_5,minus_anchor_segments_5,
+ plus_anchor_segments_3,minus_anchor_segments_3,
+ pairmax);
+
+ if (gmap5p == true) {
+ gmap5_hits = convert_plus_segments_to_gmap(gmap_history_5,/*hits*/NULL,
+ Shortread_accession(queryseq5),
+ queryuc_ptr_5,querylength5,query5_lastpos,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ queryrc5,Shortread_invertedp(queryseq5),
#endif
- floors5,querylength5,query5_lastpos,/*query_compress*/query5_compress_fwd,
- /*max_mismatches_allowed*/max_splice_mismatches_5,/*plusp*/true,genestrand,
- /*first_read_p*/true);
-
- find_spliceends_shortend(&antidonors_minus_5,&donors_minus_5,&antiacceptors_minus_5,&acceptors_minus_5,
- this5->minus_segments,this5->minus_nsegments,
-#ifdef DEBUG4E
- /*queryptr*/queryrc5,
+ query5_compress_fwd,query5_compress_rev,
+ plus_anchor_segments_5,this5->plus_segments,this5->plus_nsegments,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ user_maxlevel_5,genestrand,/*first_read_p*/true,
+ /*require_pairing_p*/true);
+ gmap5_hits = convert_minus_segments_to_gmap(gmap_history_5,/*hits*/gmap5_hits,
+ Shortread_accession(queryseq5),
+ queryuc_ptr_5,querylength5,query5_lastpos,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ queryrc5,Shortread_invertedp(queryseq5),
#endif
- floors5,querylength5,query5_lastpos,/*query_compress*/query5_compress_rev,
- /*max_mismatches_allowed*/max_splice_mismatches_5,/*plusp*/false,genestrand,
- /*first_read_p*/true);
-
- ignore_found_score = *found_score;
- singlesplicing5 = find_splicepairs_shortend(&ignore_found_score,/*hits*/singlesplicing5,
- donors_plus_5,antidonors_plus_5,acceptors_plus_5,antiacceptors_plus_5,
- donors_minus_5,antidonors_minus_5,acceptors_minus_5,antiacceptors_minus_5,
query5_compress_fwd,query5_compress_rev,
- queryuc_ptr_5,queryrc5,min_shortend,localsplicing_penalty,
- /*max_mismatches_allowed*/max_splice_mismatches_5,querylength5,
- /*pairedp*/true,/*first_read_p*/true,genestrand);
+ minus_anchor_segments_5,this5->minus_segments,this5->minus_nsegments,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ user_maxlevel_5,genestrand,/*first_read_p*/true,
+ /*require_pairing_p*/true);
+#if 0
+ /* Note: cannot use hitarray after we have removed overlapping alignments. Have to point to hits5 and hits3 and set narray5 = narray3 = 1 */
+ hitarray5[HITARRAY_SEGMENTS_GMAP] = gmap5_hits;
+#else
+ *hits5 = List_append(*hits5,gmap5_hits);
+#endif
}
-
- alloc3p = false;
- if (knownsplicingp == true && done_level_3 >= localsplicing_penalty) {
- /* Want >= and not > to give better results. Negligible effect on speed. */
- /* 8B. Short-Overlap splicing */
- max_splice_mismatches_3 = done_level_3 - localsplicing_penalty;
-
- alloc3p = true;
- donors_plus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
- antidonors_plus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
- acceptors_plus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
- antiacceptors_plus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
- donors_minus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
- antidonors_minus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
- acceptors_minus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
- antiacceptors_minus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
-
- find_spliceends_shortend(&donors_plus_3,&antidonors_plus_3,&acceptors_plus_3,&antiacceptors_plus_3,
- this3->plus_segments,this3->plus_nsegments,
-#ifdef DEBUG4E
- queryuc_ptr_3,
+ if (gmap3p == true) {
+ gmap3_hits = convert_plus_segments_to_gmap(gmap_history_3,/*hits*/NULL,
+ Shortread_accession(queryseq3),
+ queryuc_ptr_3,querylength3,query3_lastpos,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ queryrc3,Shortread_invertedp(queryseq3),
#endif
- floors3,querylength3,query3_lastpos,/*query_compress*/query3_compress_fwd,
- /*max_mismatches_allowed*/max_splice_mismatches_3,/*plusp*/true,genestrand,
- /*first_read_p*/false);
-
- find_spliceends_shortend(&antidonors_minus_3,&donors_minus_3,&antiacceptors_minus_3,&acceptors_minus_3,
- this3->minus_segments,this3->minus_nsegments,
-#ifdef DEBUG4E
- /*queryptr*/queryrc3,
+ query3_compress_fwd,query3_compress_rev,
+ plus_anchor_segments_3,this3->plus_segments,this3->plus_nsegments,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ user_maxlevel_3,genestrand,/*first_read_p*/false,
+ /*require_pairing_p*/true);
+ gmap3_hits = convert_minus_segments_to_gmap(gmap_history_3,/*hits*/gmap3_hits,
+ Shortread_accession(queryseq3),
+ queryuc_ptr_3,querylength3,query3_lastpos,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ queryrc3,Shortread_invertedp(queryseq3),
#endif
- floors3,querylength3,query3_lastpos,/*query_compress*/query3_compress_rev,
- /*max_mismatches_allowed*/max_splice_mismatches_3,/*plusp*/false,genestrand,
- /*first_read_p*/false);
-
- ignore_found_score = *found_score;
- singlesplicing3 = find_splicepairs_shortend(&ignore_found_score,/*hits*/singlesplicing3,
- donors_plus_3,antidonors_plus_3,acceptors_plus_3,antiacceptors_plus_3,
- donors_minus_3,antidonors_minus_3,acceptors_minus_3,antiacceptors_minus_3,
query3_compress_fwd,query3_compress_rev,
- queryuc_ptr_3,queryrc3,min_shortend,localsplicing_penalty,
- /*max_mismatches_allowed*/max_splice_mismatches_3,querylength3,
- /*pairedp*/true,/*first_read_p*/false,genestrand);
- }
-
- if (singlesplicing5 != NULL || singlesplicing3 != NULL) {
- /* 8. Pairing after short-overlaps */
- hitarray5[HITARRAY_SINGLESPLICING] = singlesplicing5 /* = Stage3end_remove_duplicates(singlesplicing5,queryseq5,queryseq3) */;
- hitarray3[HITARRAY_SINGLESPLICING] = singlesplicing3 /* = Stage3end_remove_duplicates(singlesplicing3,queryseq5,queryseq3) */;
- hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),&(*with_terminal),
- hitpairs,hitarray5,/*narray5*/HITARRAY_DOUBLESPLICING+1,
- hitarray3,/*narray3*/HITARRAY_DOUBLESPLICING+1,
- /*terminals5*/NULL,/*terminals3*/NULL,
- *cutoff_level_5,*cutoff_level_3,subopt_levels,
- splicesites,query5_compress_fwd,query5_compress_rev,
- query3_compress_fwd,query3_compress_rev,
- querylength5,querylength3,maxpairedpaths,localsplicing_penalty,
- genestrand);
- debug(printf("After pairing short-overlap splicing, found %d concordant, %d samechr, found_score %d\n",
- nconcordant,nsamechr,*found_score));
- if (*abort_pairing_p == false) {
- opt_level = (*found_score < opt_level) ? *found_score : opt_level;
- if ((done_level_5 = opt_level + subopt_levels) > user_maxlevel_5) {
- done_level_5 = user_maxlevel_5;
- }
- if ((done_level_3 = opt_level + subopt_levels) > user_maxlevel_3) {
- done_level_3 = user_maxlevel_3;
- }
- debug(printf("Pairing after 8A and 8B> found_score = %d, opt_level %d, done_level %d,%d\n",
- *found_score,opt_level,done_level_5,done_level_3));
- }
- }
-
- if (alloc5p == true) {
- /* Clean up 5 */
- for (i = 0; i <= max_splice_mismatches_5; i++) {
- substringlist_gc(&(donors_plus_5[i]));
- substringlist_gc(&(antidonors_plus_5[i]));
- substringlist_gc(&(acceptors_plus_5[i]));
- substringlist_gc(&(antiacceptors_plus_5[i]));
- substringlist_gc(&(donors_minus_5[i]));
- substringlist_gc(&(antidonors_minus_5[i]));
- substringlist_gc(&(acceptors_minus_5[i]));
- substringlist_gc(&(antiacceptors_minus_5[i]));
- }
- FREEA(donors_plus_5);
- FREEA(antidonors_plus_5);
- FREEA(acceptors_plus_5);
- FREEA(antiacceptors_plus_5);
- FREEA(donors_minus_5);
- FREEA(antidonors_minus_5);
- FREEA(acceptors_minus_5);
- FREEA(antiacceptors_minus_5);
- }
-
- if (alloc3p == true) {
- /* Clean up 3 */
- for (i = 0; i <= max_splice_mismatches_3; i++) {
- substringlist_gc(&(donors_plus_3[i]));
- substringlist_gc(&(antidonors_plus_3[i]));
- substringlist_gc(&(acceptors_plus_3[i]));
- substringlist_gc(&(antiacceptors_plus_3[i]));
- substringlist_gc(&(donors_minus_3[i]));
- substringlist_gc(&(antidonors_minus_3[i]));
- substringlist_gc(&(acceptors_minus_3[i]));
- substringlist_gc(&(antiacceptors_minus_3[i]));
- }
- FREEA(donors_plus_3);
- FREEA(antidonors_plus_3);
- FREEA(acceptors_plus_3);
- FREEA(antiacceptors_plus_3);
- FREEA(donors_minus_3);
- FREEA(antidonors_minus_3);
- FREEA(acceptors_minus_3);
- FREEA(antiacceptors_minus_3);
+ minus_anchor_segments_3,this3->minus_segments,this3->minus_nsegments,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ user_maxlevel_3,genestrand,/*first_read_p*/false,
+ /*require_pairing_p*/true);
+#if 0
+ /* Note: cannot use hitarray after we have removed overlapping alignments. Have to point to hits5 and hits3 and set narray5 = narray3 = 1 */
+ hitarray3[HITARRAY_SEGMENTS_GMAP] = gmap3_hits;
+#else
+ *hits3 = List_append(*hits3,gmap3_hits);
+#endif
}
}
- *hits5 = List_append(subs5,List_append(indels5,List_append(singlesplicing5,doublesplicing5)));
- *hits3 = List_append(subs3,List_append(indels3,List_append(singlesplicing3,doublesplicing3)));
- debug(printf("overall: 5' end has %d subs, %d indels, %d single splices, %d double splices\n",
- List_length(subs5),List_length(indels5),List_length(singlesplicing5),List_length(doublesplicing5)));
- debug(printf("overall: 3' end has %d subs, %d indels, %d single splices, %d double splices\n",
- List_length(subs3),List_length(indels3),List_length(singlesplicing3),List_length(doublesplicing3)));
-
- if (gmap_pairsearch_p == true) {
- debug(printf("Test for stage 9. Comparing if found score %d >= trigger_score_for_gmap %d, and if indels are involved\n",
- *found_score,trigger_score_for_gmap));
-
- /* 9A,B. GMAP pairsearch/halfmapping/unpaired */
- /* Our previous test for doing GMAP was if nconcordant == 0, but
- could lead to a false positive concordant match. */
- /* Do not update nconcordant, because poor GMAP alignments can stop search for a distant splice */
-
- /* Relying upon trigger_score_for_gmap can occasionally lead to faulty concordant alignments. However, running it on everything
- speed by half */
- if (*abort_pairing_p == true) {
- /* Don't do GMAP */
- } else if (*found_score >= trigger_score_for_gmap || indels5 != NULL || indels3 != NULL) {
- debug(printf("Stage 9. Found score %d > %d or indels5 or indels3 found. Seeing if GMAP will help on %d + %d results\n",
- *found_score,trigger_score_for_gmap,List_length(*hits5),List_length(*hits3)));
-
- /* Go ahead and resolve overlaps on each end by Stage3end, since
- we cannot do it by Stage3pair, but do not apply optimal
- score */
- debug(printf("Before remove_overlaps of 5' at cutoff level %d: %d hits\n",*cutoff_level_5,List_length(*hits5)));
- *hits5 = Stage3end_sort_bymatches(Stage3end_remove_overlaps(*hits5,/*finalp*/false));
- debug(printf("After remove_overlaps: %d\n",List_length(*hits5)));
-
- debug(printf("Before remove_overlaps of 3' at cutoff level %d: %d hits\n",*cutoff_level_3,List_length(*hits3)));
- *hits3 = Stage3end_sort_bymatches(Stage3end_remove_overlaps(*hits3,/*finalp*/false));
- debug(printf("After remove_overlaps: %d\n",List_length(*hits3)));
-
- /* Previously did pairsearch only if hits were limited, but affected by poor terminals */
- if (1 || List_length(*hits5) <= max_gmap_pairsearch) {
- i = 0;
- best_score_paired = Stage3end_best_score_paired(*hits5);
- debug13(printf("%d hits on 5' end\n",List_length(*hits5)));
- debug13(printf("For pairsearch, running GMAP on 3' end to match with 5' ends with score <= score %d\n",
- best_score_paired));
- for (p = *hits5; p != NULL && i < max_gmap_pairsearch; p = List_next(p)) {
- hit5 = (Stage3end_T) List_head(p);
- if (Stage3end_hittype(hit5) == TRANSLOC_SPLICE) {
- debug13(printf("No GMAP on transloc splice\n"));
- } else if (Stage3end_paired_usedp(hit5) == false && Stage3end_score(hit5) <= best_score_paired) {
- gmap3_hits = align_halfmapping_with_gmap(gmap_history_3,hit5,/*hit3*/NULL,queryseq5,queryseq3,
- queryuc_ptr_3,/*querylength*/querylength3,query3_lastpos,
-#ifdef END_KNOWNSPLICING_SHORTCUT
- queryrc3,Shortread_invertedp(queryseq3),
-#endif
- query3_compress_fwd,query3_compress_rev,
- this3->plus_segments,this3->plus_nsegments,this3->minus_segments,this3->minus_nsegments,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- pairmax,shortsplicedist,user_maxlevel_5,genestrand,
- /*first_read_p*/false);
- for (a = gmap3_hits; a != NULL; a = List_next(a)) {
- gmap3 = (Stage3end_T) List_head(a);
- debug13(printf("=> Successful pairsearch GMAP on hit3 with score %d and nmatches %d\n",
- Stage3end_score(gmap3),Stage3end_nmatches_posttrim(gmap3)));
-
- if (Stage3end_score(gmap3) > *cutoff_level_3 + gmap_allowance) {
- /* nsalvage += 1; */
- debug13(printf("Score is only %d vs cutoff level %d\n",Stage3end_score(gmap3),*cutoff_level_3));
- Stage3end_free(&gmap3);
-
- } else if ((newpair = Stage3pair_new(Stage3end_copy(hit5),gmap3,splicesites,
- query5_compress_fwd,query5_compress_rev,
- query3_compress_fwd,query3_compress_rev,genestrand,
- /*pairtype*/CONCORDANT,localsplicing_penalty,
- /*private5p*/true,/*private3p*/true,/*expect_concordant_p*/true)) == NULL) {
- debug13(printf( "newpair is NULL\n"));
- /* Stage3end_free(&gmap3); -- done by Stage3pair_new */
-
- } else if (Stage3end_hittype(hit5) != TERMINAL) {
- if (Stage3end_nmatches_posttrim(gmap3) >= querylength3 - (*cutoff_level_3) &&
- Stage3end_gmap_max_match_length(gmap3) >= querylength3/2) {
- /* Want high standard for nconcordant, since this precludes finding terminals */
- nconcordant += 1;
- debug13(printf("High quality (nmatches %d >= querylength %d - cutoff level %d) => nconcordant %d\n",
- Stage3end_nmatches_posttrim(gmap3),querylength3,*cutoff_level_3,nconcordant));
- }
- hitpairs = List_push(hitpairs,(void *) newpair);
- } else if (Stage3end_trimlength(hit5) < reject_trimlength) {
- if (Stage3end_nmatches_posttrim(gmap3) >= querylength3 - (*cutoff_level_3) &&
- Stage3end_gmap_max_match_length(gmap3) >= querylength3/2) {
- /* Want high standard for nconcordant, since this precludes finding terminals */
- nconcordant += 1;
- debug13(printf("High quality (nmatches %d >= querylength %d - cutoff level %d) => nconcordant %d\n",
- Stage3end_nmatches_posttrim(gmap3),querylength3,*cutoff_level_3,nconcordant));
- }
- *with_terminal = List_push(*with_terminal,(void *) newpair);
- } else {
- /* Stage3end_free(&gmap3); */
- Stage3pair_free(&newpair);
- }
- }
- List_free(&gmap3_hits);
- i++;
- }
- }
- }
-
- /* Previously did pairsearch only if hits were limited, but affected by poor terminals */
- if (1 || List_length(*hits3) <= max_gmap_pairsearch) {
- i = 0;
- best_score_paired = Stage3end_best_score_paired(*hits3);
- debug13(printf("%d hits on 3' end\n",List_length(*hits3)));
- debug13(printf("For pairsearch, running GMAP on 5' end to match with 3' ends with score <= score %d\n",
- best_score_paired));
- for (p = *hits3; p != NULL && i < max_gmap_pairsearch; p = List_next(p)) {
- hit3 = (Stage3end_T) List_head(p);
- if (Stage3end_hittype(hit3) == TRANSLOC_SPLICE) {
- debug13(printf("Not GMAP on transloc splice\n"));
- } else if (Stage3end_paired_usedp(hit3) == false && Stage3end_score(hit3) <= best_score_paired) {
- gmap5_hits = align_halfmapping_with_gmap(gmap_history_5,/*hit5*/NULL,hit3,queryseq5,queryseq3,
- queryuc_ptr_5,/*querylength*/querylength5,query5_lastpos,
-#ifdef END_KNOWNSPLICING_SHORTCUT
- queryrc5,Shortread_invertedp(queryseq5),
-#endif
- query5_compress_fwd,query5_compress_rev,
- this5->plus_segments,this5->plus_nsegments,this5->minus_segments,this5->minus_nsegments,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- pairmax,shortsplicedist,user_maxlevel_5,genestrand,
- /*first_read_p*/true);
- for (a = gmap5_hits; a != NULL; a = List_next(a)) {
- gmap5 = (Stage3end_T) List_head(a);
- debug13(printf("=> Successful pairsearch GMAP on hit5 with score %d and nmatches %d\n",
- Stage3end_score(gmap5),Stage3end_nmatches_posttrim(gmap5)));
-
- /* Stage3end_nmatches_posttrim(gmap5) >= querylength5 - (*cutoff_level_5); */
- if (Stage3end_score(gmap5) > *cutoff_level_5 + gmap_allowance) {
- /* nsalvage += 1; */
- debug13(printf("Score is only %d vs cutoff level %d\n",Stage3end_score(gmap5),*cutoff_level_5));
- Stage3end_free(&gmap5);
-
- } else if ((newpair = Stage3pair_new(gmap5,Stage3end_copy(hit3),splicesites,
- query5_compress_fwd,query5_compress_rev,
- query3_compress_fwd,query3_compress_rev,genestrand,
- /*pairtype*/CONCORDANT,localsplicing_penalty,
- /*private5p*/true,/*private3p*/true,/*expect_concordant_p*/true)) == NULL) {
- debug13(printf( "newpair is NULL\n"));
- /* Stage3end_free(&gmap5); -- done by Stage3pair_new */
-
- } else if (Stage3end_hittype(hit3) != TERMINAL) {
- if (Stage3end_nmatches_posttrim(gmap5) >= querylength5 - (*cutoff_level_5) &&
- Stage3end_gmap_max_match_length(gmap5) >= querylength5/2) {
- /* Want high standard for nconcordant, since this precludes finding terminals */
- nconcordant += 1;
- debug13(printf("High quality (nmatches %d >= querylength %d - cutoff level %d) => nconcordant %d\n",
- Stage3end_nmatches_posttrim(gmap5),querylength5,*cutoff_level_5,nconcordant));
- }
- hitpairs = List_push(hitpairs,(void *) newpair);
- } else if (Stage3end_trimlength(hit3) < reject_trimlength) {
- if (Stage3end_nmatches_posttrim(gmap5) >= querylength5 - (*cutoff_level_5) &&
- Stage3end_gmap_max_match_length(gmap5) >= querylength5/2) {
- /* Want high standard for nconcordant, since this precludes finding terminals */
- nconcordant += 1;
- debug13(printf("High quality (nmatches %d >= querylength %d - cutoff level %d) => nconcordant %d\n",
- Stage3end_nmatches_posttrim(gmap5),querylength5,*cutoff_level_5,nconcordant));
- }
- *with_terminal = List_push(*with_terminal,(void *) newpair);
- } else {
- /* Stage3end_free(&gmap5); */
- Stage3pair_free(&newpair);
- }
- }
- List_free(&gmap5_hits);
- i++;
- }
- }
+ if (gmap5_hits != NULL || gmap3_hits != NULL) {
+ found_terminals_p = true;
+ debug4t(printf("Running Stage3_pair_up_concordant\n"));
+ /* Note: cannot use hitarray after we have removed overlapping alignments */
+ hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
+ &(*samechr),&(*conc_transloc),
+ hitpairs,/*hitarray5*/&(*hits5),/*narray5*/1,
+ /*hitarray3*/&(*hits3),/*narray3*/1,
+ *cutoff_level_5,*cutoff_level_3,subopt_levels,
+ splicesites,query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,
+ querylength5,querylength3,maxpairedpaths,localsplicing_penalty,
+ genestrand);
+ debug(printf("11> After pairing GMAP, found %d concordant, %d samechr, found_score %d\n",
+ nconcordant,nsamechr,*found_score));
+ if (*abort_pairing_p == false) {
+ opt_level = (*found_score < opt_level) ? *found_score : opt_level;
+ if ((done_level_5 = opt_level + subopt_levels) > user_maxlevel_5) {
+ done_level_5 = user_maxlevel_5;
+ }
+ if ((done_level_3 = opt_level + subopt_levels) > user_maxlevel_3) {
+ done_level_3 = user_maxlevel_3;
}
- debug(printf("9> After GMAP pairsearch, found %d concordant\n",nconcordant));
+ debug(printf("Pairing after 11A and 11B> found_score = %d, opt_level %d, done_level %d,%d\n",
+ *found_score,opt_level,done_level_5,done_level_3));
}
}
- /* 10. Distant splicing */
- /* We can allow nconcordant == 0 here, if we don't update
- nconcordant from GMAP hitpairs */
- if ((knownsplicingp || novelsplicingp) && nconcordant == 0 &&
- *abort_pairing_p == false) {
+ /* Search 7: Distant splicing */
+
+ if (nconcordant > 0) {
+ /* Skip search for distant splicing */
+
+ } else if (*abort_pairing_p == true) {
+ /* Skip further searching */
+
+ } else if (knownsplicingp == false && novelsplicingp == false) {
+ /* Find distant splicing for DNA */
+
+ } else {
+ /* Find distant splicing for RNA */
if (done_level_5 >= distantsplicing_penalty) {
/* Want >= and not >, because otherwise distant splicing does not work on 50-bp reads */
/* Want > and not >=, because distant splicing needs to be better than other alternatives */
@@ -17159,29 +18770,37 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
acceptors_minus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
antiacceptors_minus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
- /* 10A. Distant splicing */
- debug(printf("Starting find_spliceends (plus)\n"));
- find_spliceends_distant(&donors_plus_5,&antidonors_plus_5,&acceptors_plus_5,&antiacceptors_plus_5,
- this5->plus_segments,this5->plus_nsegments,
+ if (floors5_computed_p == false) {
+ floors5 = compute_floors(&any_omitted_p_5,&alloc_floors_p_5,floors_array,this5,
+ querylength5,query5_lastpos,plus_indexdb_5,minus_indexdb_5,
+ indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
+ keep_floors_p);
+ floors5_computed_p = true;
+ }
+
+ /* 11A. Distant splicing */
+ debug(printf("Starting find_spliceends (plus) on 5' end with %d anchor segments\n",List_length(plus_anchor_segments_5)));
+ find_spliceends_distant_rna(&donors_plus_5,&antidonors_plus_5,&acceptors_plus_5,&antiacceptors_plus_5,
+ plus_anchor_segments_5,
#ifdef DEBUG4E
- /*queryptr*/queryuc_ptr_5,
+ /*queryptr*/queryuc_ptr_5,
#endif
- floors5,querylength5,query5_lastpos,/*query_compress*/query5_compress_fwd,
- max_splice_mismatches_5,/*plusp*/true,genestrand,/*first_read_p*/true);
+ floors5,querylength5,query5_lastpos,/*query_compress*/query5_compress_fwd,
+ max_splice_mismatches_5,/*plusp*/true,genestrand,/*first_read_p*/true);
debug(printf("Finished find_spliceends (plus)\n"));
- debug(printf("Starting find_spliceends (minus)\n"));
- find_spliceends_distant(&antidonors_minus_5,&donors_minus_5,&antiacceptors_minus_5,&acceptors_minus_5,
- this5->minus_segments,this5->minus_nsegments,
+ debug(printf("Starting find_spliceends (minus) on 5' end with %d anchor segments\n",List_length(minus_anchor_segments_5)));
+ find_spliceends_distant_rna(&antidonors_minus_5,&donors_minus_5,&antiacceptors_minus_5,&acceptors_minus_5,
+ minus_anchor_segments_5,
#ifdef DEBUG4E
- /*queryptr*/queryrc5,
+ /*queryptr*/queryrc5,
#endif
- floors5,querylength5,query5_lastpos,/*query_compress*/query5_compress_rev,
- max_splice_mismatches_5,/*plusp*/false,genestrand,/*first_read_p*/true);
+ floors5,querylength5,query5_lastpos,/*query_compress*/query5_compress_rev,
+ max_splice_mismatches_5,/*plusp*/false,genestrand,/*first_read_p*/true);
debug(printf("Finished find_spliceends (minus)\n"));
- /* 10A. Distant splicing */
+ /* 11A. Distant splicing */
nmismatches = 0;
while (longsinglesplicing5 == NULL &&
nmismatches <= max_splice_mismatches_5 /* && nsplicepairs5 < MAXCHIMERAPATHS */) {
@@ -17209,18 +18828,15 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
List_length(antidonors_minus_5[nmismatches]),List_length(antiacceptors_minus_5[nmismatches])));
ignore_found_score = *found_score;
- *hits5 = find_splicepairs_distant(&ignore_found_score,&nsplicepairs5,&longsinglesplicing5,*hits5,
- donors_plus_5,antidonors_plus_5,acceptors_plus_5,antiacceptors_plus_5,
- donors_minus_5,antidonors_minus_5,acceptors_minus_5,antiacceptors_minus_5,
- localsplicing_penalty,distantsplicing_penalty,
- querylength5,nmismatches,/*first_read_p*/true);
+ distantsplicing5 = find_splicepairs_distant_rna(&ignore_found_score,&nsplicepairs5,&longsinglesplicing5,/*hits*/distantsplicing5,
+ donors_plus_5,antidonors_plus_5,acceptors_plus_5,antiacceptors_plus_5,
+ donors_minus_5,antidonors_minus_5,acceptors_minus_5,antiacceptors_minus_5,
+ localsplicing_penalty,distantsplicing_penalty,
+ querylength5,nmismatches,/*first_read_p*/true);
+ debug(printf("Found %d distant splices on 5' end\n",List_length(distantsplicing5)));
nmismatches++;
}
- if (longsinglesplicing5 != NULL) {
- *hits5 = List_append(*hits5,longsinglesplicing5);
- }
-
/* Clean up 5 */
for (i = 0; i <= max_splice_mismatches_5; i++) {
substringlist_gc(&(donors_plus_5[i]));
@@ -17256,28 +18872,36 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
acceptors_minus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
antiacceptors_minus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
- /* 10B. Distant splicing */
- debug(printf("Starting find_spliceends (plus)\n"));
- find_spliceends_distant(&donors_plus_3,&antidonors_plus_3,&acceptors_plus_3,&antiacceptors_plus_3,
- this3->plus_segments,this3->plus_nsegments,
+ if (floors3_computed_p == false) {
+ floors3 = compute_floors(&any_omitted_p_3,&alloc_floors_p_3,floors_array,this3,
+ querylength3,query3_lastpos,plus_indexdb_3,minus_indexdb_3,
+ indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
+ keep_floors_p);
+ floors3_computed_p = true;
+ }
+
+ /* 11B. Distant splicing */
+ debug(printf("Starting find_spliceends (plus) on 3' end with %d anchor segments\n",List_length(plus_anchor_segments_3)));
+ find_spliceends_distant_rna(&donors_plus_3,&antidonors_plus_3,&acceptors_plus_3,&antiacceptors_plus_3,
+ plus_anchor_segments_3,
#ifdef DEBUG4E
- /*queryptr*/queryuc_ptr_3,
+ /*queryptr*/queryuc_ptr_3,
#endif
- floors3,querylength3,query3_lastpos,/*query_compress*/query3_compress_fwd,
- max_splice_mismatches_3,/*plusp*/true,genestrand,/*first_read_p*/false);
+ floors3,querylength3,query3_lastpos,/*query_compress*/query3_compress_fwd,
+ max_splice_mismatches_3,/*plusp*/true,genestrand,/*first_read_p*/false);
debug(printf("Finished find_spliceends (plus)\n"));
- debug(printf("Starting find_spliceends (minus)\n"));
- find_spliceends_distant(&antidonors_minus_3,&donors_minus_3,&antiacceptors_minus_3,&acceptors_minus_3,
- this3->minus_segments,this3->minus_nsegments,
+ debug(printf("Starting find_spliceends (minus) on 3' end with %d anchor segments\n",List_length(minus_anchor_segments_3)));
+ find_spliceends_distant_rna(&antidonors_minus_3,&donors_minus_3,&antiacceptors_minus_3,&acceptors_minus_3,
+ minus_anchor_segments_3,
#ifdef DEBUG4E
- /*queryptr*/queryrc3,
+ /*queryptr*/queryrc3,
#endif
- floors3,querylength3,query3_lastpos,/*query_compress*/query3_compress_rev,
- max_splice_mismatches_3,/*plusp*/false,genestrand,/*first_read_p*/false);
+ floors3,querylength3,query3_lastpos,/*query_compress*/query3_compress_rev,
+ max_splice_mismatches_3,/*plusp*/false,genestrand,/*first_read_p*/false);
debug(printf("Finished find_spliceends (minus)\n"));
- /* 10B. Distant splicing */
+ /* 11B. Distant splicing */
nmismatches = 0;
while (longsinglesplicing3 == NULL &&
nmismatches <= max_splice_mismatches_3 /* && nsplicepairs3 < MAXCHIMERAPATHS */) {
@@ -17305,18 +18929,15 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
List_length(antidonors_minus_3[nmismatches]),List_length(antiacceptors_minus_3[nmismatches])));
ignore_found_score = *found_score;
- *hits3 = find_splicepairs_distant(&ignore_found_score,&nsplicepairs3,&longsinglesplicing3,*hits3,
- donors_plus_3,antidonors_plus_3,acceptors_plus_3,antiacceptors_plus_3,
- donors_minus_3,antidonors_minus_3,acceptors_minus_3,antiacceptors_minus_3,
- localsplicing_penalty,distantsplicing_penalty,
- querylength3,nmismatches,/*first_read_p*/false);
+ distantsplicing3 = find_splicepairs_distant_rna(&ignore_found_score,&nsplicepairs3,&longsinglesplicing3,/*hits*/distantsplicing3,
+ donors_plus_3,antidonors_plus_3,acceptors_plus_3,antiacceptors_plus_3,
+ donors_minus_3,antidonors_minus_3,acceptors_minus_3,antiacceptors_minus_3,
+ localsplicing_penalty,distantsplicing_penalty,
+ querylength3,nmismatches,/*first_read_p*/false);
+ debug(printf("Found %d distant splices on 5' end\n",List_length(distantsplicing3)));
nmismatches++;
}
- if (longsinglesplicing3 != NULL) {
- *hits3 = List_append(*hits3,longsinglesplicing3);
- }
-
/* Clean up 3 */
for (i = 0; i <= max_splice_mismatches_3; i++) {
substringlist_gc(&(donors_plus_3[i]));
@@ -17338,20 +18959,32 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
FREEA(antiacceptors_minus_3);
}
- /* 10. Pairing after distant splicing */
- if (nconcordant == 0) {
- /* Note: cannot use hitarray after we have removed overlapping alignments */
+ /* 11. Pairing after distant splicing using longsinglesplicing */
+
+ if (longsinglesplicing5 != NULL || longsinglesplicing3 != NULL) {
+#if 0
+ /* Note: cannot use hitarray after we have removed overlapping alignments. Have to point to hits5 and hits3 and set narray5 = narray3 = 1 */
+ hitarray5[HITARRAY_LONGSINGLESPLICING] = longsinglesplicing5;
+ hitarray3[HITARRAY_LONGSINGLESPLICING] = longsinglesplicing3;
+#else
+ if (longsinglesplicing5 != NULL) {
+ *hits5 = List_append(*hits5,longsinglesplicing5);
+ }
+ if (longsinglesplicing3 != NULL) {
+ *hits3 = List_append(*hits3,longsinglesplicing3);
+ }
+#endif
+ /* Note: cannot use hitarray after we have removed overlapping alignments. Have to point to hits5 and hits3 and set narray5 = narray3 = 1 */
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),&(*with_terminal),
+ &(*samechr),&(*conc_transloc),
hitpairs,/*hitarray5*/&(*hits5),/*narray5*/1,
/*hitarray3*/&(*hits3),/*narray3*/1,
- /*terminals5*/NULL,/*terminals3*/NULL,
*cutoff_level_5,*cutoff_level_3,subopt_levels,
splicesites,query5_compress_fwd,query5_compress_rev,
query3_compress_fwd,query3_compress_rev,
querylength5,querylength3,maxpairedpaths,localsplicing_penalty,
genestrand);
- debug(printf("10> After pairing distant splicing, found %d concordant, %d samechr, found_score %d\n",
+ debug(printf("10> After pairing long single splicing, found %d concordant, %d samechr, found_score %d\n",
nconcordant,nsamechr,*found_score));
if (*abort_pairing_p == false) {
@@ -17366,117 +18999,35 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
}
}
- }
-
-
- debug13(printf("nconcordant = %d\n",nconcordant));
- /* 11A,B. Terminals */ /* Without nconcordant constraint, speed drops by 1/2 */
- if (nconcordant == 0 && /* nsalvage == 0 && */ *abort_pairing_p == false) {
- /* Previously used found_score > trigger_score_for_terminals */
- debug(printf("Stage 11. nconcordant == 0. Seeing if terminals will help\n"));
-
- if (done_level_5 >= terminal_threshold) {
- max_mismatches_allowed = done_level_5;
- debug(printf("Stage 11A. Finding terminals5, done_level_5 = %d, terminal_threshold = %d\n",
- done_level_5,terminal_threshold));
- if (floors5_computed_p == false) {
- floors5 = compute_floors(&any_omitted_p_5,&alloc_floors_p_5,floors_array,this5,
- querylength5,query5_lastpos,plus_indexdb_5,minus_indexdb_5,
- indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
- keep_floors_p);
- }
-
- if (segments5_computed_p == false) {
- this5->plus_segments = identify_all_segments_for_terminals(&this5->plus_nsegments,
-#ifdef LARGE_GENOMES
- this5->plus_positions_high,this5->plus_positions_low,
-#else
- this5->plus_positions,
-#endif
- this5->plus_npositions,this5->omitted,querylength5,query5_lastpos,
- floors5,max_mismatches_allowed,/*plusp*/true);
- this5->minus_segments = identify_all_segments_for_terminals(&this5->minus_nsegments,
-#ifdef LARGE_GENOMES
- this5->minus_positions_high,this5->minus_positions_low,
-#else
- this5->minus_positions,
-#endif
- this5->minus_npositions,this5->omitted,querylength5,query5_lastpos,
- floors5,max_mismatches_allowed,/*plusp*/false);
- }
- /* Don't run Stage3end_remove_duplicates until after concordant pairs are found, but do run Stage3end_remove_overlaps */
- terminals5 = find_terminals(this5->plus_segments,this5->plus_nsegments,this5->minus_segments,this5->minus_nsegments,
-#ifdef DEBUG4T
- queryuc_ptr_5,queryrc5,
-#endif
- floors5,querylength5,query5_lastpos,
- query5_compress_fwd,query5_compress_rev,
- max_mismatches_allowed,genestrand,/*first_read_p*/true);
+ /* 11. Pairing after distant splicing using distantsplicing */
#if 0
- *hits5 = Stage3end_remove_overlaps(List_append(*hits5,terminals5),/*finalp*/false);
-#endif
- }
-
- if (done_level_3 >= terminal_threshold) {
- max_mismatches_allowed = done_level_3;
- debug(printf("Stage 11B. Finding terminals3, done_level_3 = %d, terminal_threshold = %d\n",
- done_level_3,terminal_threshold));
-
- if (floors3_computed_p == false) {
- floors3 = compute_floors(&any_omitted_p_3,&alloc_floors_p_3,floors_array,this3,
- querylength3,query3_lastpos,plus_indexdb_3,minus_indexdb_3,
- indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
- keep_floors_p);
- }
- if (segments3_computed_p == false) {
- this3->plus_segments = identify_all_segments_for_terminals(&this3->plus_nsegments,
-#ifdef LARGE_GENOMES
- this3->plus_positions_high,this3->plus_positions_low,
-#else
- this3->plus_positions,
-#endif
- this3->plus_npositions,this3->omitted,querylength3,query3_lastpos,
- floors3,max_mismatches_allowed,/*plusp*/true);
- this3->minus_segments = identify_all_segments_for_terminals(&this3->minus_nsegments,
-#ifdef LARGE_GENOMES
- this3->minus_positions_high,this3->minus_positions_low,
+ /* Note: cannot use hitarray after we have removed overlapping alignments. Have to point to hits5 and hits3 and set narray5 = narray3 = 1 */
+ hitarray5[HITARRAY_DISTANTSPLICING] = distantsplicing5;
+ hitarray3[HITARRAY_DISTANTSPLICING] = distantsplicing3;
#else
- this3->minus_positions,
-#endif
- this3->minus_npositions,this3->omitted,querylength3,query3_lastpos,
- floors3,max_mismatches_allowed,/*plusp*/false);
- }
-
- /* Don't run Stage3end_remove_duplicates until after concordant pairs are found, but do run Stage3end_remove_overlaps */
- terminals3 = find_terminals(this3->plus_segments,this3->plus_nsegments,this3->minus_segments,this3->minus_nsegments,
-#ifdef DEBUG4T
- queryuc_ptr_3,queryrc3,
-#endif
- floors3,querylength3,query3_lastpos,
- query3_compress_fwd,query3_compress_rev,
- max_mismatches_allowed,genestrand,/*first_read_p*/false);
-#if 0
- *hits3 = Stage3end_remove_overlaps(List_append(*hits3,terminals3),/*finalp*/false);
-#endif
+ if (distantsplicing5 != NULL) {
+ *hits5 = List_append(*hits5,distantsplicing5);
}
+ if (distantsplicing3 != NULL) {
+ *hits3 = List_append(*hits3,distantsplicing3);
+ }
+#endif
- if (terminals5 != NULL || terminals3 != NULL) {
- found_terminals_p = true;
- debug4t(printf("Running Stage3_pair_up_concordant\n"));
- /* Note: cannot use hitarray after we have removed overlapping alignments */
+ if (nconcordant == 0 && (distantsplicing5 != NULL || distantsplicing3 != NULL)) {
+ /* Note: cannot use hitarray after we have removed overlapping alignments. Have to point to hits5 and hits3 and set narray5 = narray3 = 1 */
hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),&(*with_terminal),
+ &(*samechr),&(*conc_transloc),
hitpairs,/*hitarray5*/&(*hits5),/*narray5*/1,
/*hitarray3*/&(*hits3),/*narray3*/1,
- terminals5,terminals3,
*cutoff_level_5,*cutoff_level_3,subopt_levels,
splicesites,query5_compress_fwd,query5_compress_rev,
query3_compress_fwd,query3_compress_rev,
querylength5,querylength3,maxpairedpaths,localsplicing_penalty,
genestrand);
- debug(printf("11> After pairing terminals, found %d concordant, %d samechr, found_score %d\n",
+ debug(printf("11> After pairing distant splicing, found %d concordant, %d samechr, found_score %d\n",
nconcordant,nsamechr,*found_score));
+
if (*abort_pairing_p == false) {
opt_level = (*found_score < opt_level) ? *found_score : opt_level;
if ((done_level_5 = opt_level + subopt_levels) > user_maxlevel_5) {
@@ -17485,146 +19036,441 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
if ((done_level_3 = opt_level + subopt_levels) > user_maxlevel_3) {
done_level_3 = user_maxlevel_3;
}
- debug(printf("Pairing after 11A and 11B> found_score = %d, opt_level %d, done_level %d,%d\n",
- *found_score,opt_level,done_level_5,done_level_3));
+ debug(printf("10> found_score = %d, opt_level %d, done_level %d,%d\n",*found_score,opt_level,done_level_5,done_level_3));
}
}
}
+ /* Search 8: Terminals */
+ if (nconcordant == 0) {
+ terminals5 = find_terminals(plus_anchor_segments_5,minus_anchor_segments_5,
+ querylength5,query5_lastpos,
+ query5_compress_fwd,query5_compress_rev,
+ /*max_mismatches_allowed*/done_level_5,genestrand,/*first_read_p*/true);
+ *hits5 = List_append(*hits5,terminals5);
-#if 0
- /* 12A,B. Terminals. Not sure why we had a second round of
- terminals. Results on simulated test set are the same without
- this section. */
- if (nconcordant == 0 && nsalvage == 0 && *abort_pairing_p == false) {
- /* Previously used found_score > trigger_score_for_terminals */
- debug(printf("Stage 12. nconcordant == 0. Seeing if terminals 2 will help\n"));
-
- if (end_miss_one < querylength5/2 && done_level_5 >= terminal_threshold) {
- max_mismatches_allowed = done_level_5;
- debug(printf("Stage 12A. Finding terminals5, done_level_5 = %d, terminal_threshold = %d\n",
- done_level_5,terminal_threshold));
- if (floors5_computed_p == false) {
- floors5 = compute_floors(&any_omitted_p_5,&alloc_floors_p_5,floors_array,this5,
- querylength5,query5_lastpos,plus_indexdb_5,minus_indexdb_5,
- indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
- keep_floors_p);
- }
+ terminals3 = find_terminals(plus_anchor_segments_3,minus_anchor_segments_3,
+ querylength3,query3_lastpos,
+ query3_compress_fwd,query3_compress_rev,
+ /*max_mismatches_allowed*/done_level_3,genestrand,/*first_read_p*/false);
+ *hits3 = List_append(*hits3,terminals3);
- if (segments5_computed_p == false) {
- this5->plus_segments = identify_all_segments_for_terminals(&this5->plus_nsegments,
-#ifdef LARGE_GENOMES
- this5->plus_positions_high,this5->plus_positions_low,
-#else
- this5->plus_positions,
-#endif
- this5->plus_npositions,this5->omitted,querylength5,query5_lastpos,
- floors5,max_mismatches_allowed,/*plusp*/true);
- this5->minus_segments = identify_all_segments_for_terminals(&this5->minus_nsegments,
-#ifdef LARGE_GENOMES
- this5->minus_positions_high,this5->minus_positions_low,
-#else
- this5->minus_positions,
-#endif
- this5->minus_npositions,this5->omitted,querylength5,query5_lastpos,
- floors5,max_mismatches_allowed,/*plusp*/false);
+ hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
+ &(*samechr),&(*conc_transloc),
+ hitpairs,/*hitarray5*/&(*hits5),/*narray5*/1,
+ /*hitarray3*/&(*hits3),/*narray3*/1,
+ *cutoff_level_5,*cutoff_level_3,subopt_levels,
+ splicesites,query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,
+ querylength5,querylength3,maxpairedpaths,localsplicing_penalty,
+ genestrand);
+ debug(printf("After pairing terminals, found %d concordant, %d nsamechr, found_score %d\n",
+ nconcordant,nsamechr,*found_score));
+ }
+
+
+ /* Search 9: GMAP pairsearch/halfmapping */
+ if (gmap_pairsearch_p == true) {
+ if (*abort_pairing_p == true) {
+ /* Don't do GMAP */
+ gmap5p = gmap3p = false;
+ } else if (nconcordant > 0) {
+ gmap5p = gmap3p = false;
+ } else if (*found_score >= trigger_score_for_gmap) {
+ debug(printf("Test for stage 9: true because found_score %d >= trigger_score_for_gmap %d\n",*found_score,trigger_score_for_gmap));
+ gmap5p = gmap3p = true;
+ } else {
+ gmap5p = gmap3p = false;
+ if (better_free_end_exists_p(greedy5,subs5,terminals5,indels5,singlesplicing5,doublesplicing5,querylength5) == true) {
+ gmap3p = true; /* Do GMAP on other end */
+ }
+ if (better_free_end_exists_p(greedy3,subs3,terminals3,indels3,singlesplicing3,doublesplicing3,querylength3) == true) {
+ gmap5p = true; /* Do GMAP on other end */
}
+ debug(printf("Test for stage 9 using better_free_end_exists_p: gmap5p %d, gmap3p %d\n",gmap5p,gmap3p));
+ }
- /* Don't run Stage3end_remove_duplicates until after concordant pairs are found, but do run Stage3end_remove_overlaps */
- terminals5 = find_terminals(this5->plus_segments,this5->plus_nsegments,this5->minus_segments,this5->minus_nsegments,
-#ifdef DEBUG4T
- queryuc_ptr_5,queryrc5,
+
+ /* 9A,B. GMAP pairsearch/halfmapping/unpaired */
+ /* Our previous test for doing GMAP was if nconcordant == 0, but
+ could lead to a false positive concordant match. */
+ /* Do not update nconcordant, because poor GMAP alignments can stop search for a distant splice */
+
+ /* Relying upon trigger_score_for_gmap can occasionally lead to faulty concordant alignments. However, running it on everything
+ speed by half */
+
+ /* Go ahead and resolve overlaps on each end by Stage3end, since
+ we cannot do it by Stage3pair, but do not apply optimal
+ score */
+
+ /* Previously did pairsearch only if hits were limited, but affected by poor terminals */
+ if (gmap3p == true) {
+ debug(printf("Before remove_overlaps of 5' at cutoff level %d: %d hits\n",*cutoff_level_5,List_length(*hits5)));
+ *hits5 = Stage3end_sort_bymatches(Stage3end_remove_overlaps(*hits5,/*finalp*/false));
+ debug(printf("After remove_overlaps: %d\n",List_length(*hits5)));
+
+ i = 0;
+ best_score_paired = Stage3end_best_score_paired(*hits5);
+ debug13(printf("%d hits on 5' end\n",List_length(*hits5)));
+ debug13(printf("For pairsearch, running GMAP on 3' end to match with 5' ends with score <= score %d\n",
+ best_score_paired));
+ for (p = *hits5; p != NULL && i < max_gmap_pairsearch; p = List_next(p)) {
+ hit5 = (Stage3end_T) List_head(p);
+ if (Stage3end_hittype(hit5) == TRANSLOC_SPLICE) {
+ debug13(printf("No GMAP on transloc splice\n"));
+ } else if (Stage3end_paired_usedp(hit5) == false && Stage3end_score(hit5) <= best_score_paired) {
+ halfmapping3 = align_halfmapping_with_gmap(gmap_history_3,hit5,/*hit3*/NULL,queryseq5,queryseq3,
+ queryuc_ptr_3,/*querylength*/querylength3,query3_lastpos,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ queryrc3,Shortread_invertedp(queryseq3),
#endif
- floors5,querylength5,query5_lastpos,
- query5_compress_fwd,query5_compress_rev,
- max_mismatches_allowed,genestrand,/*first_read_p*/true);
+ query3_compress_fwd,query3_compress_rev,
+ this3->plus_segments,this3->plus_nsegments,this3->minus_segments,this3->minus_nsegments,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ pairmax,shortsplicedist,user_maxlevel_5,genestrand,
+ /*first_read_p*/false);
+ for (a = halfmapping3; a != NULL; a = List_next(a)) {
+ gmap3 = (Stage3end_T) List_head(a);
+ debug13(printf("=> Successful pairsearch GMAP on hit3 with score %d and nmatches %d\n",
+ Stage3end_score(gmap3),Stage3end_nmatches_posttrim(gmap3)));
#if 0
- *hits5 = Stage3end_remove_overlaps(List_append(*hits5,terminals5),/*finalp*/false);
+ if (Stage3end_score(gmap3) > *cutoff_level_3 + gmap_allowance) {
+ /* nsalvage += 1; */
+ debug13(printf("Score is only %d vs cutoff level %d\n",Stage3end_score(gmap3),*cutoff_level_3));
+ Stage3end_free(&gmap3);
+
+ } else if ((newpair = Stage3pair_new(Stage3end_copy(hit5),gmap3,splicesites,
+ query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,genestrand,
+ /*pairtype*/CONCORDANT,localsplicing_penalty,
+ /*private5p*/true,/*private3p*/true,/*expect_concordant_p*/true)) == NULL) {
+ debug13(printf( "newpair is NULL\n"));
+ /* Stage3end_free(&gmap3); -- done by Stage3pair_new */
+
+ } else if (Stage3end_hittype(hit5) != TERMINAL) {
+ if (Stage3end_nmatches_posttrim(gmap3) >= querylength3 - (*cutoff_level_3) &&
+ Stage3end_gmap_max_match_length(gmap3) >= querylength3/2) {
+ /* Want high standard for nconcordant, since this precludes finding terminals */
+ nconcordant += 1;
+ debug13(printf("High quality (nmatches %d >= querylength %d - cutoff level %d) => nconcordant %d\n",
+ Stage3end_nmatches_posttrim(gmap3),querylength3,*cutoff_level_3,nconcordant));
+ }
+ hitpairs = List_push(hitpairs,(void *) newpair);
+ } else if (Stage3end_trimlength(hit5) < reject_trimlength) {
+ if (Stage3end_nmatches_posttrim(gmap3) >= querylength3 - (*cutoff_level_3) &&
+ Stage3end_gmap_max_match_length(gmap3) >= querylength3/2) {
+ /* Want high standard for nconcordant, since this precludes finding terminals */
+ nconcordant += 1;
+ debug13(printf("High quality (nmatches %d >= querylength %d - cutoff level %d) => nconcordant %d\n",
+ Stage3end_nmatches_posttrim(gmap3),querylength3,*cutoff_level_3,nconcordant));
+ }
+ hitpairs = List_push(hitpairs,(void *) newpair);
+ } else {
+ /* Stage3end_free(&gmap3); */
+ Stage3pair_free(&newpair);
+ }
+#else
+ if ((newpair = Stage3pair_new(Stage3end_copy(hit5),gmap3,splicesites,
+ query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,genestrand,
+ /*pairtype*/CONCORDANT,localsplicing_penalty,
+ /*private5p*/true,/*private3p*/true,/*expect_concordant_p*/true)) == NULL) {
+ debug13(printf( "newpair is NULL\n"));
+ /* Stage3end_free(&gmap3); -- done by Stage3pair_new */
+ } else {
+ nconcordant += 1;
+ debug13(printf("New pair => nconcordant %d\n",nconcordant));
+ hitpairs = List_push(hitpairs,(void *) newpair);
+ }
#endif
+ }
+ List_free(&halfmapping3);
+ i++;
+ }
+ }
}
+ if (gmap5p == true) {
+ debug(printf("Before remove_overlaps of 3' at cutoff level %d: %d hits\n",*cutoff_level_3,List_length(*hits3)));
+ *hits3 = Stage3end_sort_bymatches(Stage3end_remove_overlaps(*hits3,/*finalp*/false));
+ debug(printf("After remove_overlaps: %d\n",List_length(*hits3)));
- if (end_miss_one < querylength3/2 && done_level_3 >= terminal_threshold) {
- max_mismatches_allowed = done_level_3;
- debug(printf("Stage 12B. Finding terminals3, done_level_3 = %d, terminal_threshold = %d\n",
- done_level_3,terminal_threshold));
- if (floors3_computed_p == false) {
- floors3 = compute_floors(&any_omitted_p_3,&alloc_floors_p_3,floors_array,this3,
- querylength3,query3_lastpos,plus_indexdb_3,minus_indexdb_3,
- indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
- keep_floors_p);
- }
- if (segments3_computed_p == false) {
- this3->plus_segments = identify_all_segments_for_terminals(&this3->plus_nsegments,
-#ifdef LARGE_GENOMES
- this3->plus_positions_high,this3->plus_positions_low,
-#else
- this3->plus_positions,
+ i = 0;
+ best_score_paired = Stage3end_best_score_paired(*hits3);
+ debug13(printf("%d hits on 3' end\n",List_length(*hits3)));
+ debug13(printf("For pairsearch, running GMAP on 5' end to match with 3' ends with score <= score %d\n",
+ best_score_paired));
+ for (p = *hits3; p != NULL && i < max_gmap_pairsearch; p = List_next(p)) {
+ hit3 = (Stage3end_T) List_head(p);
+ if (Stage3end_hittype(hit3) == TRANSLOC_SPLICE) {
+ debug13(printf("Not GMAP on transloc splice\n"));
+ } else if (Stage3end_paired_usedp(hit3) == false && Stage3end_score(hit3) <= best_score_paired) {
+ halfmapping5 = align_halfmapping_with_gmap(gmap_history_5,/*hit5*/NULL,hit3,queryseq5,queryseq3,
+ queryuc_ptr_5,/*querylength*/querylength5,query5_lastpos,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ queryrc5,Shortread_invertedp(queryseq5),
#endif
- this3->plus_npositions,this3->omitted,querylength3,query3_lastpos,
- floors3,max_mismatches_allowed,/*plusp*/true);
- this3->minus_segments = identify_all_segments_for_terminals(&this3->minus_nsegments,
-#ifdef LARGE_GENOMES
- this3->minus_positions_high,this3->minus_positions_low,
+ query5_compress_fwd,query5_compress_rev,
+ this5->plus_segments,this5->plus_nsegments,this5->minus_segments,this5->minus_nsegments,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ pairmax,shortsplicedist,user_maxlevel_5,genestrand,
+ /*first_read_p*/true);
+ for (a = halfmapping5; a != NULL; a = List_next(a)) {
+ gmap5 = (Stage3end_T) List_head(a);
+ debug13(printf("=> Successful pairsearch GMAP on hit5 with score %d and nmatches %d\n",
+ Stage3end_score(gmap5),Stage3end_nmatches_posttrim(gmap5)));
+#if 0
+ /* Stage3end_nmatches_posttrim(gmap5) >= querylength5 - (*cutoff_level_5); */
+ if (Stage3end_score(gmap5) > *cutoff_level_5 + gmap_allowance) {
+ /* nsalvage += 1; */
+ debug13(printf("Score is only %d vs cutoff level %d\n",Stage3end_score(gmap5),*cutoff_level_5));
+ Stage3end_free(&gmap5);
+
+ } else if ((newpair = Stage3pair_new(gmap5,Stage3end_copy(hit3),splicesites,
+ query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,genestrand,
+ /*pairtype*/CONCORDANT,localsplicing_penalty,
+ /*private5p*/true,/*private3p*/true,/*expect_concordant_p*/true)) == NULL) {
+ debug13(printf( "newpair is NULL\n"));
+ /* Stage3end_free(&gmap5); -- done by Stage3pair_new */
+
+ } else if (Stage3end_hittype(hit3) != TERMINAL) {
+ if (Stage3end_nmatches_posttrim(gmap5) >= querylength5 - (*cutoff_level_5) &&
+ Stage3end_gmap_max_match_length(gmap5) >= querylength5/2) {
+ /* Want high standard for nconcordant, since this precludes finding terminals */
+ nconcordant += 1;
+ debug13(printf("High quality (nmatches %d >= querylength %d - cutoff level %d) => nconcordant %d\n",
+ Stage3end_nmatches_posttrim(gmap5),querylength5,*cutoff_level_5,nconcordant));
+ }
+ hitpairs = List_push(hitpairs,(void *) newpair);
+ } else if (Stage3end_trimlength(hit3) < reject_trimlength) {
+ if (Stage3end_nmatches_posttrim(gmap5) >= querylength5 - (*cutoff_level_5) &&
+ Stage3end_gmap_max_match_length(gmap5) >= querylength5/2) {
+ /* Want high standard for nconcordant, since this precludes finding terminals */
+ nconcordant += 1;
+ debug13(printf("High quality (nmatches %d >= querylength %d - cutoff level %d) => nconcordant %d\n",
+ Stage3end_nmatches_posttrim(gmap5),querylength5,*cutoff_level_5,nconcordant));
+ }
+ hitpairs = List_push(hitpairs,(void *) newpair);
+ } else {
+ /* Stage3end_free(&gmap5); */
+ Stage3pair_free(&newpair);
+ }
#else
- this3->minus_positions,
+ if ((newpair = Stage3pair_new(gmap5,Stage3end_copy(hit3),splicesites,
+ query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,genestrand,
+ /*pairtype*/CONCORDANT,localsplicing_penalty,
+ /*private5p*/true,/*private3p*/true,/*expect_concordant_p*/true)) == NULL) {
+ debug13(printf( "newpair is NULL\n"));
+ /* Stage3end_free(&gmap5); -- done by Stage3pair_new */
+ } else {
+ nconcordant += 1;
+ debug13(printf("new pair => nconcordant %d\n",nconcordant));
+ hitpairs = List_push(hitpairs,(void *) newpair);
+ }
#endif
- this3->minus_npositions,this3->omitted,querylength3,query3_lastpos,
- floors3,max_mismatches_allowed,/*plusp*/false);
+ }
+ List_free(&halfmapping5);
+ i++;
+ }
}
+ }
+
+ debug(printf("9> After GMAP pairsearch, found %d concordant\n",nconcordant));
+ }
+
- /* Don't run Stage3end_remove_duplicates until after concordant pairs are found, but do run Stage3end_remove_overlaps */
- terminals3 = find_terminals(this3->plus_segments,this3->plus_nsegments,this3->minus_segments,this3->minus_nsegments,
-#ifdef DEBUG4T
- queryuc_ptr_3,queryrc3,
-#endif
- floors3,querylength3,query3_lastpos,
- query3_compress_fwd,query3_compress_rev,
- max_mismatches_allowed,genestrand,/*first_read_p*/false);
#if 0
- *hits3 = Stage3end_remove_overlaps(List_append(*hits3,terminals3),/*finalp*/false);
+ /* Unused code */
+ alloc5p = false;
+ if (knownsplicingp == true && done_level_5 >= localsplicing_penalty) {
+ /* Want >= and not > to give better results. Negligible effect on speed. */
+ /* 8A. Shortend splicing */
+ max_splice_mismatches_5 = done_level_5 - localsplicing_penalty;
+
+ alloc5p = true;
+ donors_plus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+ antidonors_plus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+ acceptors_plus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+ antiacceptors_plus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+ donors_minus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+ antidonors_minus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+ acceptors_minus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+ antiacceptors_minus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+
+ if (floors5_computed_p == false) {
+ floors5 = compute_floors(&any_omitted_p_5,&alloc_floors_p_5,floors_array,this5,
+ querylength5,query5_lastpos,plus_indexdb_5,minus_indexdb_5,
+ indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
+ keep_floors_p);
+ floors5_computed_p = true;
+ }
+
+ find_spliceends_shortend(&donors_plus_5,&antidonors_plus_5,&acceptors_plus_5,&antiacceptors_plus_5,
+ plus_anchor_segments_5,
+#ifdef DEBUG4E
+ queryuc_ptr_5,
+#endif
+ floors5,querylength5,query5_lastpos,/*query_compress*/query5_compress_fwd,
+ /*max_mismatches_allowed*/max_splice_mismatches_5,/*plusp*/true,genestrand,
+ /*first_read_p*/true);
+
+ find_spliceends_shortend(&antidonors_minus_5,&donors_minus_5,&antiacceptors_minus_5,&acceptors_minus_5,
+ minus_anchor_segments_5,
+#ifdef DEBUG4E
+ /*queryptr*/queryrc5,
#endif
+ floors5,querylength5,query5_lastpos,/*query_compress*/query5_compress_rev,
+ /*max_mismatches_allowed*/max_splice_mismatches_5,/*plusp*/false,genestrand,
+ /*first_read_p*/true);
+
+ ignore_found_score = *found_score;
+ singlesplicing5 = find_splicepairs_shortend(&ignore_found_score,/*hits*/singlesplicing5,
+ donors_plus_5,antidonors_plus_5,acceptors_plus_5,antiacceptors_plus_5,
+ donors_minus_5,antidonors_minus_5,acceptors_minus_5,antiacceptors_minus_5,
+ query5_compress_fwd,query5_compress_rev,
+ queryuc_ptr_5,queryrc5,min_shortend,localsplicing_penalty,
+ /*max_mismatches_allowed*/max_splice_mismatches_5,querylength5,
+ /*pairedp*/true,/*first_read_p*/true,genestrand);
+ }
+
+
+ alloc3p = false;
+ if (knownsplicingp == true && done_level_3 >= localsplicing_penalty) {
+ /* Want >= and not > to give better results. Negligible effect on speed. */
+ /* 8B. Short-Overlap splicing */
+ max_splice_mismatches_3 = done_level_3 - localsplicing_penalty;
+
+ alloc3p = true;
+ donors_plus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+ antidonors_plus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+ acceptors_plus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+ antiacceptors_plus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+ donors_minus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+ antidonors_minus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+ acceptors_minus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+ antiacceptors_minus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+
+ if (floors3_computed_p == false) {
+ floors3 = compute_floors(&any_omitted_p_3,&alloc_floors_p_3,floors_array,this3,
+ querylength3,query3_lastpos,plus_indexdb_3,minus_indexdb_3,
+ indexdb_size_threshold,max_end_insertions,/*omit_frequent_p*/true,/*omit_repetitive_p*/true,
+ keep_floors_p);
+ floors3_computed_p = true;
}
- if (terminals5 != NULL || terminals3 != NULL) {
- found_terminals_p = true;
- debug4t(printf("Running Stage3_pair_up_concordant\n"));
- /* Cannot use hitarray after we have removed overlapping alignments */
- hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
- &(*samechr),&(*conc_transloc),&(*with_terminal),
- hitpairs,/*hitarray5*/&(*hits5),/*narray5*/1,
- /*hitarray3*/&(*hits3),/*narray3*/1,
- terminals5,terminals3,*cutoff_level_5,*cutoff_level_3,subopt_levels,
- splicesites,query5_compress_fwd,query5_compress_rev,
- query3_compress_fwd,query3_compress_rev,
- querylength5,querylength3,maxpairedpaths,localsplicing_penalty,
- genestrand);
- debug(printf("12> After pairing terminals, found %d concordant, %d samechr, found_score %d\n",
- nconcordant,nsamechr,*found_score));
+ find_spliceends_shortend(&donors_plus_3,&antidonors_plus_3,&acceptors_plus_3,&antiacceptors_plus_3,
+ plus_anchor_segments_3,
+#ifdef DEBUG4E
+ queryuc_ptr_3,
+#endif
+ floors3,querylength3,query3_lastpos,/*query_compress*/query3_compress_fwd,
+ /*max_mismatches_allowed*/max_splice_mismatches_3,/*plusp*/true,genestrand,
+ /*first_read_p*/false);
-#if 0
- /* Not needed at end */
- if (*abort_pairing_p == false) {
- opt_level = (*found_score < opt_level) ? *found_score : opt_level;
- if ((done_level_5 = opt_level + subopt_levels) > user_maxlevel_5) {
- done_level_5 = user_maxlevel_5;
- }
- if ((done_level_3 = opt_level + subopt_levels) > user_maxlevel_3) {
- done_level_3 = user_maxlevel_3;
- }
- debug(printf("Pairing after 12A and 12B> found_score = %d, opt_level %d, done_level %d,%d\n",
- *found_score,opt_level,done_level_5,done_level_3));
+ find_spliceends_shortend(&antidonors_minus_3,&donors_minus_3,&antiacceptors_minus_3,&acceptors_minus_3,
+ minus_anchor_segments_3,
+#ifdef DEBUG4E
+ /*queryptr*/queryrc3,
+#endif
+ floors3,querylength3,query3_lastpos,/*query_compress*/query3_compress_rev,
+ /*max_mismatches_allowed*/max_splice_mismatches_3,/*plusp*/false,genestrand,
+ /*first_read_p*/false);
+
+ ignore_found_score = *found_score;
+ singlesplicing3 = find_splicepairs_shortend(&ignore_found_score,/*hits*/singlesplicing3,
+ donors_plus_3,antidonors_plus_3,acceptors_plus_3,antiacceptors_plus_3,
+ donors_minus_3,antidonors_minus_3,acceptors_minus_3,antiacceptors_minus_3,
+ query3_compress_fwd,query3_compress_rev,
+ queryuc_ptr_3,queryrc3,min_shortend,localsplicing_penalty,
+ /*max_mismatches_allowed*/max_splice_mismatches_3,querylength3,
+ /*pairedp*/true,/*first_read_p*/false,genestrand);
+ }
+
+ if (singlesplicing5 != NULL || singlesplicing3 != NULL) {
+ /* 8. Pairing after short-overlaps */
+ hitarray5[HITARRAY_SINGLESPLICING] = singlesplicing5 /* = Stage3end_remove_duplicates(singlesplicing5,queryseq5,queryseq3) */;
+ hitarray3[HITARRAY_SINGLESPLICING] = singlesplicing3 /* = Stage3end_remove_duplicates(singlesplicing3,queryseq5,queryseq3) */;
+ hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
+ &(*samechr),&(*conc_transloc),
+ hitpairs,hitarray5,/*narray5*/HITARRAY_DOUBLESPLICING+1,
+ hitarray3,/*narray3*/HITARRAY_DOUBLESPLICING+1,
+ *cutoff_level_5,*cutoff_level_3,subopt_levels,
+ splicesites,query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,
+ querylength5,querylength3,maxpairedpaths,localsplicing_penalty,
+ genestrand);
+ debug(printf("After pairing short-overlap splicing, found %d concordant, %d samechr, found_score %d\n",
+ nconcordant,nsamechr,*found_score));
+ if (*abort_pairing_p == false) {
+ opt_level = (*found_score < opt_level) ? *found_score : opt_level;
+ if ((done_level_5 = opt_level + subopt_levels) > user_maxlevel_5) {
+ done_level_5 = user_maxlevel_5;
}
-#endif
-
+ if ((done_level_3 = opt_level + subopt_levels) > user_maxlevel_3) {
+ done_level_3 = user_maxlevel_3;
+ }
+ debug(printf("Pairing after 8A and 8B> found_score = %d, opt_level %d, done_level %d,%d\n",
+ *found_score,opt_level,done_level_5,done_level_3));
}
}
-#endif
+ if (alloc5p == true) {
+ /* Clean up 5 */
+ for (i = 0; i <= max_splice_mismatches_5; i++) {
+ substringlist_gc(&(donors_plus_5[i]));
+ substringlist_gc(&(antidonors_plus_5[i]));
+ substringlist_gc(&(acceptors_plus_5[i]));
+ substringlist_gc(&(antiacceptors_plus_5[i]));
+ substringlist_gc(&(donors_minus_5[i]));
+ substringlist_gc(&(antidonors_minus_5[i]));
+ substringlist_gc(&(acceptors_minus_5[i]));
+ substringlist_gc(&(antiacceptors_minus_5[i]));
+ }
+ FREEA(donors_plus_5);
+ FREEA(antidonors_plus_5);
+ FREEA(acceptors_plus_5);
+ FREEA(antiacceptors_plus_5);
+ FREEA(donors_minus_5);
+ FREEA(antidonors_minus_5);
+ FREEA(acceptors_minus_5);
+ FREEA(antiacceptors_minus_5);
+ }
+
+ if (alloc3p == true) {
+ /* Clean up 3 */
+ for (i = 0; i <= max_splice_mismatches_3; i++) {
+ substringlist_gc(&(donors_plus_3[i]));
+ substringlist_gc(&(antidonors_plus_3[i]));
+ substringlist_gc(&(acceptors_plus_3[i]));
+ substringlist_gc(&(antiacceptors_plus_3[i]));
+ substringlist_gc(&(donors_minus_3[i]));
+ substringlist_gc(&(antidonors_minus_3[i]));
+ substringlist_gc(&(acceptors_minus_3[i]));
+ substringlist_gc(&(antiacceptors_minus_3[i]));
+ }
+ FREEA(donors_plus_3);
+ FREEA(antidonors_plus_3);
+ FREEA(acceptors_plus_3);
+ FREEA(antiacceptors_plus_3);
+ FREEA(donors_minus_3);
+ FREEA(antidonors_minus_3);
+ FREEA(acceptors_minus_3);
+ FREEA(antiacceptors_minus_3);
+ }
+#endif
+
+
+#if 0
+ /* Unused code */
+ /* This halfmapping appears to be a duplicate of the previous halfmapping */
debug13(printf("found_terminals_p = %d\n",found_terminals_p));
/* nconcordant might include a concordant pair of terminals */
- if (/* nconcordant == 0 && */ found_terminals_p == true && gmap_terminal_p == true) {
+ if (/* nconcordant == 0 && */ found_terminals_p == true && gmap_pairsearch_p == true) {
/* 13. GMAP terminal */
/* Go ahead and resolve overlaps on each end by Stage3end, since
we cannot do it by Stage3pair, but do not apply optimal
@@ -17644,15 +19490,13 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
#endif
i = 0;
- debug13(printf("%d terminals on 5' end (vs max_gmap_terminal %d)\n",List_length(terminals5),max_gmap_terminal));
- debug13(printf("For terminals, running GMAP on 3' end to match with 5' ends\n"));
- terminals5 = Stage3end_sort_bymatches(terminals5);
- for (p = terminals5; p != NULL && i < max_gmap_terminal; p = List_next(p)) {
+ debug13(printf("%d hits on 5' end (vs max_gmap_pairsearch %d)\n",List_length(*hits5),max_gmap_pairsearch));
+ debug13(printf("For each hit, running GMAP on 3' end to match with 5' ends\n"));
+ *hits5 = Stage3end_sort_bymatches(*hits5);
+ for (p = *hits5; p != NULL && i < max_gmap_pairsearch; p = List_next(p)) {
hit5 = (Stage3end_T) List_head(p);
- debug13(printf("#%d/%d with nmatches %d\n",i,max_gmap_terminal,Stage3end_nmatches_posttrim(hit5)));
- assert(Stage3end_hittype(hit5) == TERMINAL);
- if (1 || Stage3end_hittype(hit5) == TERMINAL /* && Stage3end_paired_usedp(hit5) == false && Stage3end_score(hit5) <= best_score_paired */) {
- gmap3_hits = align_halfmapping_with_gmap(gmap_history_3,hit5,/*hit3*/NULL,queryseq5,queryseq3,
+ debug13(printf("#%d/%d with nmatches %d\n",i,max_gmap_pairsearch,Stage3end_nmatches_posttrim(hit5)));
+ halfmapping3 = align_halfmapping_with_gmap(gmap_history_3,hit5,/*hit3*/NULL,queryseq5,queryseq3,
queryuc_ptr_3,/*querylength*/querylength3,query3_lastpos,
#ifdef END_KNOWNSPLICING_SHORTCUT
queryrc3,Shortread_invertedp(queryseq3),
@@ -17663,43 +19507,40 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
pairmax,shortsplicedist,user_maxlevel_3,genestrand,
/*first_read_p*/false);
-
- for (a = gmap3_hits; a != NULL; a = List_next(a)) {
- gmap3 = (Stage3end_T) List_head(a);
- debug13(printf("=> Successful terminal GMAP on hit3 %p with score %d and nmatches %d. Copying hit5 %p\n",
- gmap3,Stage3end_score(gmap3),Stage3end_nmatches_posttrim(gmap3),hit5));
- if (Stage3end_score(gmap3) > *cutoff_level_3 + gmap_allowance) {
- debug13(printf("Score is only %d vs cutoff level %d\n",Stage3end_score(gmap3),*cutoff_level_3));
- Stage3end_free(&gmap3);
- } else if ((newpair = Stage3pair_new(Stage3end_copy(hit5),gmap3,splicesites,
- query5_compress_fwd,query5_compress_rev,
- query3_compress_fwd,query3_compress_rev,genestrand,
- /*pairtype*/CONCORDANT,localsplicing_penalty,
- /*private5p*/true,/*private3p*/true,/*expect_concordant_p*/true)) == NULL) {
- /* Stage3end_free(&gmap3); -- done by Stage3pair_new */
- } else if (Stage3end_trimlength(hit5) < reject_trimlength) {
- /* Save hit5-gmap3 */
- *with_terminal = List_push(*with_terminal,(void *) newpair);
- } else {
- /* Stage3end_free(&gmap3); */
- Stage3pair_free(&newpair);
- }
+
+ for (a = halfmapping3; a != NULL; a = List_next(a)) {
+ gmap3 = (Stage3end_T) List_head(a);
+ debug13(printf("=> Successful terminal GMAP on hit3 %p with score %d and nmatches %d. Copying hit5 %p\n",
+ gmap3,Stage3end_score(gmap3),Stage3end_nmatches_posttrim(gmap3),hit5));
+ if (Stage3end_score(gmap3) > *cutoff_level_3 + gmap_allowance) {
+ debug13(printf("Score is only %d vs cutoff level %d\n",Stage3end_score(gmap3),*cutoff_level_3));
+ Stage3end_free(&gmap3);
+ } else if ((newpair = Stage3pair_new(Stage3end_copy(hit5),gmap3,splicesites,
+ query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,genestrand,
+ /*pairtype*/CONCORDANT,localsplicing_penalty,
+ /*private5p*/true,/*private3p*/true,/*expect_concordant_p*/true)) == NULL) {
+ /* Stage3end_free(&gmap3); -- done by Stage3pair_new */
+ } else if (Stage3end_trimlength(hit5) < reject_trimlength) {
+ /* Save hit5-gmap3 */
+ hitpairs = List_push(hitpairs,(void *) newpair);
+ } else {
+ /* Stage3end_free(&gmap3); */
+ Stage3pair_free(&newpair);
}
- List_free(&gmap3_hits);
- i++;
}
+ List_free(&halfmapping3);
+ i++;
}
i = 0;
- debug13(printf("%d terminals on 3' end (vs max_gmap_terminal %d)\n",List_length(terminals3),max_gmap_terminal));
- debug13(printf("For terminals, running GMAP on 5' end to match with 3' ends\n"));
- terminals3 = Stage3end_sort_bymatches(terminals3);
- for (p = terminals3; p != NULL && i < max_gmap_terminal; p = List_next(p)) {
+ debug13(printf("%d hits on 3' end (vs max_gmap_pairsearch %d)\n",List_length(*hits3),max_gmap_pairsearch));
+ debug13(printf("For each hit, running GMAP on 5' end to match with 3' ends\n"));
+ *hits3 = Stage3end_sort_bymatches(*hits3);
+ for (p = *hits3; p != NULL && i < max_gmap_pairsearch; p = List_next(p)) {
hit3 = (Stage3end_T) List_head(p);
- debug13(printf("#%d/%d with nmatches %d\n",i,max_gmap_terminal,Stage3end_nmatches_posttrim(hit3)));
- assert(Stage3end_hittype(hit3) == TERMINAL);
- if (1 || Stage3end_hittype(hit3) == TERMINAL /* && Stage3end_paired_usedp(hit3) == false && Stage3end_score(hit3) <= best_score_paired */) {
- gmap5_hits = align_halfmapping_with_gmap(gmap_history_5,/*hit5*/NULL,hit3,queryseq5,queryseq3,
+ debug13(printf("#%d/%d with nmatches %d\n",i,max_gmap_pairsearch,Stage3end_nmatches_posttrim(hit3)));
+ halfmapping5 = align_halfmapping_with_gmap(gmap_history_5,/*hit5*/NULL,hit3,queryseq5,queryseq3,
queryuc_ptr_5,/*querylength*/querylength5,query5_lastpos,
#ifdef END_KNOWNSPLICING_SHORTCUT
queryrc5,Shortread_invertedp(queryseq5),
@@ -17710,35 +19551,37 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
pairmax,shortsplicedist,user_maxlevel_5,genestrand,
/*first_read_p*/true);
- for (a = gmap5_hits; a != NULL; a = List_next(a)) {
- gmap5 = (Stage3end_T) List_head(a);
- debug13(printf("=> Successful terminal GMAP on hit5 %p with score %d and nmatches %d. Copying hit3 %p\n",
- hit5,Stage3end_score(gmap5),Stage3end_nmatches_posttrim(gmap5),hit3));
- if (Stage3end_score(gmap5) > *cutoff_level_5 + gmap_allowance) {
- debug13(printf("Score is only %d vs cutoff level %d\n",Stage3end_score(gmap5),*cutoff_level_5));
- Stage3end_free(&gmap5);
-
- } else if ((newpair = Stage3pair_new(gmap5,Stage3end_copy(hit3),splicesites,
- query5_compress_fwd,query5_compress_rev,
- query3_compress_fwd,query3_compress_rev,genestrand,
- /*pairtype*/CONCORDANT,localsplicing_penalty,
- /*private5p*/true,/*private3p*/true,/*expect_concordant_p*/true)) == NULL) {
- /* Stage3end_free(&gmap5); -- done by Stage3pair_new */
- } else if (Stage3end_trimlength(hit3) < reject_trimlength) {
- /* Save gmap5-hit3 */
- *with_terminal = List_push(*with_terminal,(void *) newpair);
- } else {
- /* Stage3end_free(&gmap5); */
- Stage3pair_free(&newpair);
- }
+ for (a = halfmapping5; a != NULL; a = List_next(a)) {
+ gmap5 = (Stage3end_T) List_head(a);
+ debug13(printf("=> Successful terminal GMAP on hit5 %p with score %d and nmatches %d. Copying hit3 %p\n",
+ hit5,Stage3end_score(gmap5),Stage3end_nmatches_posttrim(gmap5),hit3));
+ if (Stage3end_score(gmap5) > *cutoff_level_5 + gmap_allowance) {
+ debug13(printf("Score is only %d vs cutoff level %d\n",Stage3end_score(gmap5),*cutoff_level_5));
+ Stage3end_free(&gmap5);
+
+ } else if ((newpair = Stage3pair_new(gmap5,Stage3end_copy(hit3),splicesites,
+ query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,genestrand,
+ /*pairtype*/CONCORDANT,localsplicing_penalty,
+ /*private5p*/true,/*private3p*/true,/*expect_concordant_p*/true)) == NULL) {
+ /* Stage3end_free(&gmap5); -- done by Stage3pair_new */
+ } else if (Stage3end_trimlength(hit3) < reject_trimlength) {
+ /* Save gmap5-hit3 */
+ hitpairs = List_push(hitpairs,(void *) newpair);
+ } else {
+ /* Stage3end_free(&gmap5); */
+ Stage3pair_free(&newpair);
}
- List_free(&gmap5_hits);
- i++;
}
+ List_free(&halfmapping5);
+ i++;
}
debug(printf("13> After GMAP terminals, found %d concordant\n",nconcordant));
}
+#endif
+
+
if (alloc_floors_p_5 == true) {
Floors_free(&floors5);
@@ -17747,35 +19590,33 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
Floors_free(&floors3);
}
- debug(printf("Ending with %d hitpairs, %d samechr, %d conc_transloc, %d with_terminal\n",
- List_length(hitpairs),List_length(*samechr),List_length(*conc_transloc),List_length(*with_terminal)));
-
- /* Return terminals so they can be freed */
- if (terminals5 != NULL) {
- *hits5 = List_append(*hits5,terminals5);
- }
- if (terminals3 != NULL) {
- *hits3 = List_append(*hits3,terminals3);
- }
+ debug(printf("Ending with %d hitpairs, %d samechr, %d conc_transloc\n",
+ List_length(hitpairs),List_length(*samechr),List_length(*conc_transloc)));
hitpairs = Stage3pair_remove_circular_alias(hitpairs);
#if 0
hitpairs = Stage3pair_remove_overlaps(hitpairs,/*translocp*/false,/*finalp*/true);
#endif
+
+ List_free(&plus_anchor_segments_5);
+ List_free(&minus_anchor_segments_5);
+ List_free(&plus_anchor_segments_3);
+ List_free(&minus_anchor_segments_3);
+
return hitpairs;
}
static Pairtype_T
choose_among_paired (int *best_nmatches_paired, int *best_nmatches_5, int *best_nmatches_3,
- List_T hitpairs, List_T samechr, List_T conc_transloc, List_T with_terminal) {
+ List_T hitpairs, List_T samechr, List_T conc_transloc) {
Pairtype_T final_pairtype = UNPAIRED;
List_T p;
Stage3pair_T hitpair;
int nmatches, nmatches5, nmatches3;
- debug16(printf("choose: %d hitpairs, %d conc_transloc, %d with_terminal, %d samechr\n",
- List_length(hitpairs),List_length(conc_transloc),List_length(with_terminal),List_length(samechr)));
+ debug16(printf("choose: %d hitpairs, %d conc_transloc, %d samechr\n",
+ List_length(hitpairs),List_length(conc_transloc),List_length(samechr)));
*best_nmatches_paired = 0;
for (p = hitpairs; p != NULL; p = p->rest) {
@@ -17800,16 +19641,6 @@ choose_among_paired (int *best_nmatches_paired, int *best_nmatches_5, int *best_
}
}
- for (p = with_terminal; p != NULL; p = p->rest) {
- hitpair = (Stage3pair_T) p->first;
- if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) {
- final_pairtype = CONCORDANT_TERMINAL;
- *best_nmatches_paired = nmatches;
- *best_nmatches_5 = nmatches5;
- *best_nmatches_3 = nmatches3;
- }
- }
-
for (p = samechr; p != NULL; p = p->rest) {
hitpair = (Stage3pair_T) p->first;
if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) {
@@ -17848,7 +19679,7 @@ best_nmatches_singleend (List_T hits) {
/* Clean up all previous calculations */
static void
-paired_results_free (T this5, T this3, List_T hitpairs, List_T samechr, List_T conc_transloc, List_T with_terminal,
+paired_results_free (T this5, T this3, List_T hitpairs, List_T samechr, List_T conc_transloc,
List_T hits5, List_T hits3, int querylength5, int querylength3) {
List_T p;
Stage3pair_T stage3pair;
@@ -17871,12 +19702,6 @@ paired_results_free (T this5, T this3, List_T hitpairs, List_T samechr, List_T c
}
List_free(&conc_transloc);
- for (p = with_terminal; p != NULL; p = List_next(p)) {
- stage3pair = (Stage3pair_T) List_head(p);
- Stage3pair_free(&stage3pair);
- }
- List_free(&with_terminal);
-
stage3list_gc(&hits3);
stage3list_gc(&hits5);
Stage1_free(&this3,querylength3);
@@ -17894,7 +19719,7 @@ realign_separately (Stage3end_T **stage3array5, int *nhits5, int *first_absmq5,
Shortread_T queryseq5, char *queryuc_ptr_5, char *queryrc5, char *quality_string_5, int querylength5, int query5_lastpos,
Shortread_T queryseq3, char *queryuc_ptr_3, char *queryrc3, char *quality_string_3, int querylength3, int query3_lastpos,
Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev, int indexdb_size_threshold,
- Genome_T genome, Floors_T *floors_array,
+ Floors_T *floors_array,
int user_maxlevel_5, int user_maxlevel_3, int indel_penalty_middle, int indel_penalty_end,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
@@ -17914,7 +19739,7 @@ realign_separately (Stage3end_T **stage3array5, int *nhits5, int *first_absmq5,
} else {
singlehits5 = align_end(&cutoff_level_5,gmap_history_5,this5,
query5_compress_fwd,query5_compress_rev,
- queryuc_ptr_5,queryrc5,querylength5,query5_lastpos,
+ Shortread_accession(queryseq5),queryuc_ptr_5,queryrc5,querylength5,query5_lastpos,
indexdb_fwd,indexdb_rev,indexdb_size_threshold,
floors_array,oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
@@ -17929,9 +19754,9 @@ realign_separately (Stage3end_T **stage3array5, int *nhits5, int *first_absmq5,
} else {
*stage3array5 = (Stage3end_T *) List_to_array_out(singlehits5,NULL); List_free(&singlehits5); /* Return value */
*stage3array5 = Stage3end_eval_and_sort(&(*nhits5),&(*first_absmq5),&(*second_absmq5),
- *stage3array5,maxpaths_search,queryseq5,
+ *stage3array5,maxpaths_search,queryseq5,queryuc_ptr_5,queryrc5,
query5_compress_fwd,query5_compress_rev,
- genome,quality_string_5,/*displayp*/true);
+ quality_string_5,/*displayp*/true);
}
/* Re-align 3' end as a single end */
@@ -17942,7 +19767,7 @@ realign_separately (Stage3end_T **stage3array5, int *nhits5, int *first_absmq5,
} else {
singlehits3 = align_end(&cutoff_level_3,gmap_history_3,this3,
query3_compress_fwd,query3_compress_rev,
- queryuc_ptr_3,queryrc3,querylength3,query3_lastpos,
+ Shortread_accession(queryseq5),queryuc_ptr_3,queryrc3,querylength3,query3_lastpos,
indexdb_fwd,indexdb_rev,indexdb_size_threshold,
floors_array,oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
@@ -17957,9 +19782,9 @@ realign_separately (Stage3end_T **stage3array5, int *nhits5, int *first_absmq5,
} else {
*stage3array3 = (Stage3end_T *) List_to_array_out(singlehits3,NULL); List_free(&singlehits3); /* Return value */
*stage3array3 = Stage3end_eval_and_sort(&(*nhits3),&(*first_absmq3),&(*second_absmq3),
- *stage3array3,maxpaths_search,queryseq3,
+ *stage3array3,maxpaths_search,queryseq3,queryuc_ptr_3,queryrc3,
query3_compress_fwd,query3_compress_rev,
- genome,quality_string_3,/*displayp*/true);
+ quality_string_3,/*displayp*/true);
}
return;
@@ -17971,7 +19796,7 @@ static Stage3pair_T *
consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T *final_pairtype,
Stage3end_T **stage3array5, int *nhits5, int *first_absmq5, int *second_absmq5,
Stage3end_T **stage3array3, int *nhits3, int *first_absmq3, int *second_absmq3,
- List_T hitpairs, List_T samechr, List_T conc_transloc, List_T with_terminal,
+ List_T hitpairs, List_T samechr, List_T conc_transloc,
List_T hits5, List_T hits3, History_T gmap_history_5, History_T gmap_history_3,
Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
@@ -17979,10 +19804,13 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
struct Segment_T **minus_segments_genestrand_5, int *minus_nsegments_genestrand_5,
struct Segment_T **plus_segments_genestrand_3, int *plus_nsegments_genestrand_3,
struct Segment_T **minus_segments_genestrand_3, int *minus_nsegments_genestrand_3,
- Shortread_T queryseq5, char *queryuc_ptr_5, char *quality_string_5, int querylength5, int query5_lastpos,
- Shortread_T queryseq3, char *queryuc_ptr_3, char *quality_string_3, int querylength3, int query3_lastpos,
- Genome_T genome, int cutoff_level_5, int cutoff_level_3,
- int localsplicing_penalty,
+
+ Shortread_T queryseq5, char *queryuc_ptr_5, char *queryrc5,
+ char *quality_string_5, int querylength5, int query5_lastpos,
+ Shortread_T queryseq3, char *queryuc_ptr_3, char *queryrc3,
+ char *quality_string_3, int querylength3, int query3_lastpos,
+
+ int cutoff_level_5, int cutoff_level_3, int localsplicing_penalty,
Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
@@ -17997,7 +19825,7 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
debug16(printf("Entered consolidate_paired_results. Passing pointer %p\n",&best_nmatches_paired));
*final_pairtype = choose_among_paired(&best_nmatches_paired,&best_nmatches_paired_5,&best_nmatches_paired_3,
- hitpairs,samechr,conc_transloc,with_terminal);
+ hitpairs,samechr,conc_transloc);
if (*final_pairtype == CONCORDANT) {
/* Have concordant results */
@@ -18014,12 +19842,6 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
}
List_free(&conc_transloc);
- for (p = with_terminal; p != NULL; p = List_next(p)) {
- stage3pair = (Stage3pair_T) List_head(p);
- Stage3pair_free(&stage3pair);
- }
- List_free(&with_terminal);
-
if (novelsplicingp || knownsplicingp) {
hitpairs = Stage3pair_remove_excess_terminals(hitpairs);
}
@@ -18066,7 +19888,7 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
localsplicing_penalty,
oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- pairmax,user_maxlevel_5,user_maxlevel_3,cutoff_level_5,cutoff_level_3,
+ pairmax,cutoff_level_5,cutoff_level_3,
/*pairtype*/CONCORDANT,/*expect_concordant_p*/true,
/*redo_for_sense_p*/false);
if (Stage3pair_sense_consistent_p(result) == false) {
@@ -18082,7 +19904,7 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
localsplicing_penalty,
oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- pairmax,user_maxlevel_5,user_maxlevel_3,cutoff_level_5,cutoff_level_3,
+ pairmax,cutoff_level_5,cutoff_level_3,
/*pairtype*/CONCORDANT,/*expect_concordant_p*/true,
/*redo_for_sense_p*/true);
}
@@ -18114,12 +19936,6 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
}
List_free(&conc_transloc);
- for (p = with_terminal; p != NULL; p = List_next(p)) {
- stage3pair = (Stage3pair_T) List_head(p);
- Stage3pair_free(&stage3pair);
- }
- List_free(&with_terminal);
-
if (gmap_improvement_p == false) {
debug16(printf("No GMAP improvement: Before removing overlaps, %d results\n",List_length(samechr)));
result = Stage3pair_optimal_score(samechr,/*cutoff*/1000000,subopt_levels,
@@ -18157,7 +19973,7 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
localsplicing_penalty,
oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- pairmax,user_maxlevel_5,user_maxlevel_3,cutoff_level_5,cutoff_level_3,
+ pairmax,cutoff_level_5,cutoff_level_3,
/*pairtype*/PAIRED_UNSPECIFIED,/*expect_concordant_p*/false,
/*redo_for_sense_p*/false);
result = Stage3pair_optimal_score(result,/*cutoff*/1000000,subopt_levels,
@@ -18194,12 +20010,6 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
}
List_free(&samechr);
- for (p = with_terminal; p != NULL; p = List_next(p)) {
- stage3pair = (Stage3pair_T) List_head(p);
- Stage3pair_free(&stage3pair);
- }
- List_free(&with_terminal);
-
result = Stage3pair_optimal_score(conc_transloc,/*cutoff*/1000000,subopt_levels,
query5_compress_fwd,query5_compress_rev,
query3_compress_fwd,query3_compress_rev,
@@ -18247,12 +20057,6 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
}
List_free(&samechr);
- for (p = with_terminal; p != NULL; p = List_next(p)) {
- stage3pair = (Stage3pair_T) List_head(p);
- Stage3pair_free(&stage3pair);
- }
- List_free(&with_terminal);
-
for (p = conc_transloc; p != NULL; p = List_next(p)) {
stage3pair = (Stage3pair_T) List_head(p);
Stage3pair_free(&stage3pair);
@@ -18283,16 +20087,8 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
}
List_free(&conc_transloc);
- if (novelsplicingp || knownsplicingp) {
- with_terminal = Stage3pair_remove_excess_terminals(with_terminal);
- }
-
if (gmap_improvement_p == false) {
- debug16(printf("No GMAP improvement: Before removing overlaps, %d results\n",List_length(with_terminal)));
- result = Stage3pair_optimal_score(with_terminal,/*cutoff*/1000000,subopt_levels,
- query5_compress_fwd,query5_compress_rev,
- query3_compress_fwd,query3_compress_rev,
- querylength5,querylength3,/*keep_gmap_p*/true,/*finalp*/true);
+ debug16(printf("No GMAP improvement: Before removing overlaps, %d results\n",List_length(result)));
result = Stage3pair_remove_overlaps(result,/*translocp*/false,/*finalp*/true);
result = Stage3pair_optimal_score(result,/*cutoff*/1000000,subopt_levels,
query5_compress_fwd,query5_compress_rev,
@@ -18303,11 +20099,7 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
debug16(printf("After removing overlaps, %d results\n",List_length(result)));
} else {
- debug16(printf("GMAP improvement: Before removing overlaps, %d results\n",List_length(with_terminal)));
- result = Stage3pair_optimal_score(with_terminal,/*cutoff*/1000000,subopt_levels,
- query5_compress_fwd,query5_compress_rev,
- query3_compress_fwd,query3_compress_rev,
- querylength5,querylength3,/*keep_gmap_p*/true,/*finalp*/false);
+ debug16(printf("GMAP improvement: Before removing overlaps, %d results\n",List_length(results)));
result = Stage3pair_remove_overlaps(result,/*translocp*/false,/*finalp*/false);
result = Stage3pair_optimal_score(result,/*cutoff*/1000000,subopt_levels,
query5_compress_fwd,query5_compress_rev,
@@ -18329,7 +20121,7 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
localsplicing_penalty,
oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- pairmax,user_maxlevel_5,user_maxlevel_3,cutoff_level_5,cutoff_level_3,
+ pairmax,cutoff_level_5,cutoff_level_3,
/*pairtype*/CONCORDANT,/*expect_concordant_p*/true,
/*redo_for_sense_p*/false);
if (Stage3pair_sense_consistent_p(result) == false) {
@@ -18345,7 +20137,7 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
localsplicing_penalty,
oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- pairmax,user_maxlevel_5,user_maxlevel_3,cutoff_level_5,cutoff_level_3,
+ pairmax,cutoff_level_5,cutoff_level_3,
/*pairtype*/CONCORDANT,/*expect_concordant_p*/true,
/*redo_for_sense_p*/true);
}
@@ -18372,12 +20164,6 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
}
List_free(&conc_transloc);
- for (p = with_terminal; p != NULL; p = List_next(p)) {
- stage3pair = (Stage3pair_T) List_head(p);
- Stage3pair_free(&stage3pair);
- }
- List_free(&with_terminal);
-
result = (List_T) NULL;
}
@@ -18431,9 +20217,10 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
Stage3pair_privatize(stage3pairarray,/*npairs*/1);
Stage3pair_eval_and_sort(&(*npaths),&(*first_absmq),&(*second_absmq),
stage3pairarray,maxpaths_search,queryseq5,queryseq3,
+ queryuc_ptr_5,queryrc5,queryuc_ptr_3,queryrc3,
query5_compress_fwd,query5_compress_rev,
query3_compress_fwd,query3_compress_rev,
- genome,quality_string_5,quality_string_3);
+ quality_string_5,quality_string_3);
stage3list_gc(&singlehits3);
stage3list_gc(&singlehits5);
@@ -18477,13 +20264,15 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
*stage3array5 = Stage3end_eval_and_sort_guided(&(*nhits5),&(*first_absmq5),&(*second_absmq5),
/*guide*/(*stage3array3)[0],
*stage3array5,maxpaths_search,queryseq5,
+ queryuc_ptr_5,queryrc5,
query5_compress_fwd,query5_compress_rev,
- genome,quality_string_5,/*displayp*/true);
+ quality_string_5,/*displayp*/true);
} else {
*stage3array5 = Stage3end_eval_and_sort(&(*nhits5),&(*first_absmq5),&(*second_absmq5),
*stage3array5,maxpaths_search,queryseq5,
+ queryuc_ptr_5,queryrc5,
query5_compress_fwd,query5_compress_rev,
- genome,quality_string_5,/*displayp*/true);
+ quality_string_5,/*displayp*/true);
}
}
@@ -18493,13 +20282,15 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
*stage3array3 = Stage3end_eval_and_sort_guided(&(*nhits3),&(*first_absmq3),&(*second_absmq3),
/*guide*/(*stage3array5)[0],
*stage3array3,maxpaths_search,queryseq3,
+ queryuc_ptr_3,queryrc3,
query3_compress_fwd,query3_compress_rev,
- genome,quality_string_3,/*displayp*/true);
+ quality_string_3,/*displayp*/true);
} else {
*stage3array3 = Stage3end_eval_and_sort(&(*nhits3),&(*first_absmq3),&(*second_absmq3),
*stage3array3,maxpaths_search,queryseq3,
+ queryuc_ptr_3,queryrc3,
query3_compress_fwd,query3_compress_rev,
- genome,quality_string_3,/*displayp*/true);
+ quality_string_3,/*displayp*/true);
}
}
debug16(printf("Result is NULL, and we have %d hits on 5' end and %d hits on 3' end\n",*nhits5,*nhits3));
@@ -18516,9 +20307,10 @@ consolidate_paired_results (int *npaths, int *first_absmq, int *second_absmq, Pa
Stage3pair_privatize(stage3pairarray,*npaths);
Stage3pair_eval_and_sort(&(*npaths),&(*first_absmq),&(*second_absmq),
stage3pairarray,maxpaths_search,queryseq5,queryseq3,
+ queryuc_ptr_5,queryrc5,queryuc_ptr_3,queryrc3,
query5_compress_fwd,query5_compress_rev,
query3_compress_fwd,query3_compress_rev,
- genome,quality_string_5,quality_string_3);
+ quality_string_5,quality_string_3);
stage3list_gc(&hits3);
stage3list_gc(&hits5);
@@ -18536,7 +20328,7 @@ paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T *final
Stage3end_T **stage3array3, int *nhits3, int *first_absmq3, int *second_absmq3,
Shortread_T queryseq5, Shortread_T queryseq3,
Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev, int indexdb_size_threshold,
- Genome_T genome, Floors_T *floors_array,
+ Floors_T *floors_array,
double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
@@ -18545,8 +20337,7 @@ paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T *final
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Chrpos_T pairmax, bool keep_floors_p) {
Stage3pair_T *stage3pairarray;
- List_T hitpairs = NULL, samechr = NULL, conc_transloc = NULL, with_terminal = NULL,
- hits5 = NULL, hits3 = NULL;
+ List_T hitpairs = NULL, samechr = NULL, conc_transloc = NULL, hits5 = NULL, hits3 = NULL;
T this5, this3;
char *queryuc_ptr_5, *queryuc_ptr_3, *quality_string_5, *quality_string_3;
Compress_T query5_compress_fwd = NULL, query5_compress_rev = NULL, query3_compress_fwd = NULL, query3_compress_rev = NULL;
@@ -18620,50 +20411,39 @@ paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T *final
user_maxlevel_3 = 0;
}
- noligos3 = read_oligos(&allvalidp3,this3,queryuc_ptr_3,querylength3,query3_lastpos,/*genestrand*/0,
- /*first_read_p*/false);
- if (noligos3 == 0) {
- debug(printf("Aborting because no hits found anywhere\n"));
- Stage1_free(&this3,querylength3);
+ query3_compress_fwd = Compress_new_fwd(queryuc_ptr_3,querylength3);
+ query3_compress_rev = Compress_new_rev(queryuc_ptr_3,querylength3);
+ gmap_history_3 = History_new();
+ make_complement_buffered(queryrc3,queryuc_ptr_3,querylength3);
- *npaths = *nhits3 = 0;
- *stage3array3 = (Stage3end_T *) NULL;
- return (Stage3pair_T *) NULL;
+ hits3 = align_end(&cutoff_level_3,gmap_history_3,this3,
+ query3_compress_fwd,query3_compress_rev,
+ Shortread_accession(queryseq5),queryuc_ptr_3,queryrc3,querylength3,query3_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,
+ floors_array,oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ allvalidp3,keep_floors_p,/*genestrand*/0,/*first_read_p*/false);
+ if ((*nhits3 = List_length(hits3)) == 0) {
+ *stage3array3 = (Stage3end_T *) NULL;
} else {
- query3_compress_fwd = Compress_new_fwd(queryuc_ptr_3,querylength3);
- query3_compress_rev = Compress_new_rev(queryuc_ptr_3,querylength3);
- gmap_history_3 = History_new();
- make_complement_buffered(queryrc3,queryuc_ptr_3,querylength3);
-
- hits3 = align_end(&cutoff_level_3,gmap_history_3,this3,
- query3_compress_fwd,query3_compress_rev,
- queryuc_ptr_3,queryrc3,querylength3,query3_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- floors_array,oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp3,keep_floors_p,/*genestrand*/0,/*first_read_p*/false);
-
- if ((*nhits3 = List_length(hits3)) == 0) {
- *stage3array3 = (Stage3end_T *) NULL;
- } else {
- *stage3array3 = (Stage3end_T *) List_to_array_out(hits3,NULL); List_free(&hits3); /* Return value */
- *stage3array3 = Stage3end_eval_and_sort(&(*nhits3),&(*first_absmq3),&(*second_absmq3),
- *stage3array3,maxpaths_search,queryseq3,
- query3_compress_fwd,query3_compress_rev,
- genome,quality_string_3,/*displayp*/true);
- }
- *npaths = 0;
- *final_pairtype = UNPAIRED;
- History_free(&gmap_history_3);
- Compress_free(&query3_compress_fwd);
- Compress_free(&query3_compress_rev);
- Stage1_free(&this3,querylength3);
- return (Stage3pair_T *) NULL;
+ *stage3array3 = (Stage3end_T *) List_to_array_out(hits3,NULL); List_free(&hits3); /* Return value */
+ *stage3array3 = Stage3end_eval_and_sort(&(*nhits3),&(*first_absmq3),&(*second_absmq3),
+ *stage3array3,maxpaths_search,queryseq3,
+ queryuc_ptr_3,queryrc3,
+ query3_compress_fwd,query3_compress_rev,
+ quality_string_3,/*displayp*/true);
}
+ *npaths = 0;
+ *final_pairtype = UNPAIRED;
+ History_free(&gmap_history_3);
+ Compress_free(&query3_compress_fwd);
+ Compress_free(&query3_compress_rev);
+ Stage1_free(&this3,querylength3);
+ return (Stage3pair_T *) NULL;
} else if (querylength3 < min_readlength) {
/* Solve just 5' end */
@@ -18690,50 +20470,39 @@ paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T *final
user_maxlevel_5 = 0;
}
- noligos5 = read_oligos(&allvalidp5,this5,queryuc_ptr_5,querylength5,query5_lastpos,/*genestrand*/0,
- /*first_read_p*/true);
- if (noligos5 == 0) {
- debug(printf("Aborting because no hits found anywhere\n"));
- Stage1_free(&this5,querylength5);
+ query5_compress_fwd = Compress_new_fwd(queryuc_ptr_5,querylength5);
+ query5_compress_rev = Compress_new_rev(queryuc_ptr_5,querylength5);
+ gmap_history_5 = History_new();
+ make_complement_buffered(queryrc5,queryuc_ptr_5,querylength5);
- *npaths = *nhits5 = 0;
- *stage3array5 = (Stage3end_T *) NULL;
- return (Stage3pair_T *) NULL;
+ hits5 = align_end(&cutoff_level_5,gmap_history_5,this5,
+ query5_compress_fwd,query5_compress_rev,
+ Shortread_accession(queryseq5),queryuc_ptr_5,queryrc5,querylength5,query5_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,
+ floors_array,oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ user_maxlevel_5,indel_penalty_middle,indel_penalty_end,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ allvalidp5,keep_floors_p,/*genestrand*/0,/*first_read_p*/true);
+ if ((*nhits5 = List_length(hits5)) == 0) {
+ *stage3array5 = (Stage3end_T *) NULL;
} else {
- query5_compress_fwd = Compress_new_fwd(queryuc_ptr_5,querylength5);
- query5_compress_rev = Compress_new_rev(queryuc_ptr_5,querylength5);
- gmap_history_5 = History_new();
- make_complement_buffered(queryrc5,queryuc_ptr_5,querylength5);
-
- hits5 = align_end(&cutoff_level_5,gmap_history_5,this5,
- query5_compress_fwd,query5_compress_rev,
- queryuc_ptr_5,queryrc5,querylength5,query5_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- floors_array,oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel_5,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp5,keep_floors_p,/*genestrand*/0,/*first_read_p*/true);
-
- if ((*nhits5 = List_length(hits5)) == 0) {
- *stage3array5 = (Stage3end_T *) NULL;
- } else {
- *stage3array5 = (Stage3end_T *) List_to_array_out(hits5,NULL); List_free(&hits5); /* Return value */
- *stage3array5 = Stage3end_eval_and_sort(&(*nhits5),&(*first_absmq5),&(*second_absmq5),
- *stage3array5,maxpaths_search,queryseq5,
- query5_compress_fwd,query5_compress_rev,
- genome,quality_string_5,/*displayp*/true);
- }
- *npaths = 0;
- *final_pairtype = UNPAIRED;
- History_free(&gmap_history_5);
- Compress_free(&query5_compress_fwd);
- Compress_free(&query5_compress_rev);
- Stage1_free(&this5,querylength5);
- return (Stage3pair_T *) NULL;
+ *stage3array5 = (Stage3end_T *) List_to_array_out(hits5,NULL); List_free(&hits5); /* Return value */
+ *stage3array5 = Stage3end_eval_and_sort(&(*nhits5),&(*first_absmq5),&(*second_absmq5),
+ *stage3array5,maxpaths_search,queryseq5,
+ queryuc_ptr_5,queryrc5,
+ query5_compress_fwd,query5_compress_rev,
+ quality_string_5,/*displayp*/true);
}
+ *npaths = 0;
+ *final_pairtype = UNPAIRED;
+ History_free(&gmap_history_5);
+ Compress_free(&query5_compress_fwd);
+ Compress_free(&query5_compress_rev);
+ Stage1_free(&this5,querylength5);
+ return (Stage3pair_T *) NULL;
} else {
if (user_maxlevel_float < 0.0) {
@@ -18762,105 +20531,90 @@ paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T *final
user_maxlevel_3 = 0;
}
- noligos5 = read_oligos(&allvalidp5,this5,queryuc_ptr_5,querylength5,query5_lastpos,/*genestrand*/0,
- /*first_read_p*/true);
- noligos3 = read_oligos(&allvalidp3,this3,queryuc_ptr_3,querylength3,query3_lastpos,/*genestrand*/0,
- /*first_read_p*/false);
- if (noligos5 == 0 && noligos3 == 0) {
- debug(printf("Aborting because no hits found anywhere\n"));
- Stage1_free(&this3,querylength3);
- Stage1_free(&this5,querylength5);
+ query5_compress_fwd = Compress_new_fwd(queryuc_ptr_5,querylength5);
+ query5_compress_rev = Compress_new_rev(queryuc_ptr_5,querylength5);
+ query3_compress_fwd = Compress_new_fwd(queryuc_ptr_3,querylength3);
+ query3_compress_rev = Compress_new_rev(queryuc_ptr_3,querylength3);
+ gmap_history_5 = History_new();
+ gmap_history_3 = History_new();
+ make_complement_buffered(queryrc5,queryuc_ptr_5,querylength5);
+ make_complement_buffered(queryrc3,queryuc_ptr_3,querylength3);
+
+ hitpairs = align_pair(&abort_pairing_p,&found_score,&cutoff_level_5,&cutoff_level_3,
+ &samechr,&conc_transloc,gmap_history_5,gmap_history_3,
+ &hits5,&hits3,this5,this3,query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,
+ queryuc_ptr_5,queryuc_ptr_3,queryrc5,queryrc3,
+ querylength5,querylength3,query5_lastpos,query3_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
+
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+
+ user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ allvalidp5,allvalidp3,pairmax,maxpairedpaths,keep_floors_p,
+ queryseq5,queryseq3,/*genestrand*/0);
+
+ if (abort_pairing_p == true) {
+ debug16(printf("abort_pairing_p is true\n"));
+ paired_results_free(this5,this3,hitpairs,samechr,conc_transloc,
+ hits5,hits3,querylength5,querylength3);
+
+ this5 = Stage1_new(querylength5);
+ this3 = Stage1_new(querylength3);
+ realign_separately(stage3array5,&(*nhits5),&(*first_absmq5),&(*second_absmq5),
+ stage3array3,&(*nhits3),&(*first_absmq3),&(*second_absmq3),
+ gmap_history_5,gmap_history_3,this5,this3,
+ query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
+ queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
+ queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
+ user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ keep_floors_p,/*genestrand*/0);
- *npaths = *nhits5 = *nhits3 = 0;
- *stage3array5 = *stage3array3 = (Stage3end_T *) NULL;
+ *npaths = 0;
+ *final_pairtype = UNPAIRED;
+ History_free(&gmap_history_3);
+ History_free(&gmap_history_5);
+ Compress_free(&query5_compress_fwd);
+ Compress_free(&query5_compress_rev);
+ Compress_free(&query3_compress_fwd);
+ Compress_free(&query3_compress_rev);
+ Stage1_free(&this5,querylength5);
+ Stage1_free(&this3,querylength3);
return (Stage3pair_T *) NULL;
} else {
- query5_compress_fwd = Compress_new_fwd(queryuc_ptr_5,querylength5);
- query5_compress_rev = Compress_new_rev(queryuc_ptr_5,querylength5);
- query3_compress_fwd = Compress_new_fwd(queryuc_ptr_3,querylength3);
- query3_compress_rev = Compress_new_rev(queryuc_ptr_3,querylength3);
- gmap_history_5 = History_new();
- gmap_history_3 = History_new();
- make_complement_buffered(queryrc5,queryuc_ptr_5,querylength5);
- make_complement_buffered(queryrc3,queryuc_ptr_3,querylength3);
-
- hitpairs = align_pair(&abort_pairing_p,&found_score,&cutoff_level_5,&cutoff_level_3,
- &samechr,&conc_transloc,&with_terminal,gmap_history_5,gmap_history_3,
- &hits5,&hits3,this5,this3,query5_compress_fwd,query5_compress_rev,
- query3_compress_fwd,query3_compress_rev,
- queryuc_ptr_5,queryuc_ptr_3,queryrc5,queryrc3,
- querylength5,querylength3,query5_lastpos,query3_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
-
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
-
- user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp5,allvalidp3,pairmax,maxpairedpaths,keep_floors_p,
- queryseq5,queryseq3,/*genestrand*/0);
-
- if (abort_pairing_p == true) {
- debug16(printf("abort_pairing_p is true\n"));
- paired_results_free(this5,this3,hitpairs,samechr,conc_transloc,with_terminal,
- hits5,hits3,querylength5,querylength3);
-
- this5 = Stage1_new(querylength5);
- this3 = Stage1_new(querylength3);
- realign_separately(stage3array5,&(*nhits5),&(*first_absmq5),&(*second_absmq5),
- stage3array3,&(*nhits3),&(*first_absmq3),&(*second_absmq3),
- gmap_history_5,gmap_history_3,this5,this3,
- query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
- queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
- queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,genome,floors_array,
- user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- keep_floors_p,/*genestrand*/0);
-
- *npaths = 0;
- *final_pairtype = UNPAIRED;
- History_free(&gmap_history_3);
- History_free(&gmap_history_5);
- Compress_free(&query5_compress_fwd);
- Compress_free(&query5_compress_rev);
- Compress_free(&query3_compress_fwd);
- Compress_free(&query3_compress_rev);
- Stage1_free(&this5,querylength5);
- Stage1_free(&this3,querylength3);
- return (Stage3pair_T *) NULL;
-
- } else {
- stage3pairarray =
- consolidate_paired_results(&(*npaths),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
- &(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
- &(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
- hitpairs,samechr,conc_transloc,with_terminal,hits5,hits3,gmap_history_5,gmap_history_3,
- query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
- &this5->plus_segments,&this5->plus_nsegments,&this5->minus_segments,&this5->minus_nsegments,
- &this3->plus_segments,&this3->plus_nsegments,&this3->minus_segments,&this3->minus_nsegments,
- queryseq5,queryuc_ptr_5,quality_string_5,querylength5,query5_lastpos,
- queryseq3,queryuc_ptr_3,quality_string_3,querylength3,query3_lastpos,
- genome,cutoff_level_5,cutoff_level_3,
- localsplicing_penalty,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,pairmax,user_maxlevel_5,user_maxlevel_3);
+ stage3pairarray =
+ consolidate_paired_results(&(*npaths),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
+ &(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
+ &(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
+ hitpairs,samechr,conc_transloc,hits5,hits3,gmap_history_5,gmap_history_3,
+ query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
+ &this5->plus_segments,&this5->plus_nsegments,&this5->minus_segments,&this5->minus_nsegments,
+ &this3->plus_segments,&this3->plus_nsegments,&this3->minus_segments,&this3->minus_nsegments,
+ queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
+ queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
+ cutoff_level_5,cutoff_level_3,
+ localsplicing_penalty,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,pairmax,user_maxlevel_5,user_maxlevel_3);
- History_free(&gmap_history_3);
- History_free(&gmap_history_5);
- Compress_free(&query5_compress_fwd);
- Compress_free(&query5_compress_rev);
- Compress_free(&query3_compress_fwd);
- Compress_free(&query3_compress_rev);
- Stage1_free(&this5,querylength5);
- Stage1_free(&this3,querylength3);
- return stage3pairarray;
- }
+ History_free(&gmap_history_3);
+ History_free(&gmap_history_5);
+ Compress_free(&query5_compress_fwd);
+ Compress_free(&query5_compress_rev);
+ Compress_free(&query3_compress_fwd);
+ Compress_free(&query3_compress_rev);
+ Stage1_free(&this5,querylength5);
+ Stage1_free(&this3,querylength3);
+ return stage3pairarray;
}
}
}
@@ -18872,7 +20626,7 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
Stage3end_T **stage3array3, int *nhits3, int *first_absmq3, int *second_absmq3,
Shortread_T queryseq5, Shortread_T queryseq3,
Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev, int indexdb_size_threshold,
- Genome_T genome, Floors_T *floors_array,
+ Floors_T *floors_array,
double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
@@ -18884,7 +20638,6 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
List_T hitpairs, hitpairs_geneplus = NULL, hitpairs_geneminus = NULL;
List_T samechr, samechr_geneplus = NULL, samechr_geneminus = NULL;
List_T conc_transloc, conc_transloc_geneplus = NULL, conc_transloc_geneminus = NULL;
- List_T with_terminal, with_terminal_geneplus = NULL, with_terminal_geneminus = NULL;
List_T hits5, hits3, hits_geneplus_5 = NULL, hits_geneplus_3 = NULL, hits_geneminus_5 = NULL, hits_geneminus_3 = NULL;
T this_geneplus_5, this_geneplus_3, this_geneminus_5, this_geneminus_3;
char *queryuc_ptr_5, *queryuc_ptr_3, *quality_string_5, *quality_string_3;
@@ -18977,7 +20730,7 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
} else {
hits_geneplus_3 = align_end(&cutoff_level_3,gmap_history_3,this_geneplus_3,
query3_compress_fwd,query3_compress_rev,
- queryuc_ptr_3,queryrc3,querylength3,query3_lastpos,
+ Shortread_accession(queryseq5),queryuc_ptr_3,queryrc3,querylength3,query3_lastpos,
indexdb_fwd,indexdb_fwd,indexdb_size_threshold,
floors_array,oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
@@ -18993,7 +20746,7 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
} else {
hits_geneminus_3 = align_end(&cutoff_level_3,gmap_history_3,this_geneminus_3,
query3_compress_fwd,query3_compress_rev,
- queryuc_ptr_3,queryrc3,querylength3,query3_lastpos,
+ Shortread_accession(queryseq5),queryuc_ptr_3,queryrc3,querylength3,query3_lastpos,
indexdb_fwd,indexdb_rev,indexdb_size_threshold,
floors_array,oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
@@ -19010,8 +20763,9 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
*stage3array3 = (Stage3end_T *) List_to_array_out(hits3,NULL); List_free(&hits3); /* Return value */
*stage3array3 = Stage3end_eval_and_sort(&(*nhits3),&(*first_absmq3),&(*second_absmq3),
*stage3array3,maxpaths_search,queryseq3,
+ queryuc_ptr_3,queryrc3,
query3_compress_fwd,query3_compress_rev,
- genome,quality_string_3,/*displayp*/true);
+ quality_string_3,/*displayp*/true);
}
*npaths = 0;
@@ -19061,7 +20815,7 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
} else {
hits_geneplus_5 = align_end(&cutoff_level_5,gmap_history_5,this_geneplus_5,
query5_compress_fwd,query5_compress_rev,
- queryuc_ptr_5,queryrc5,querylength5,query5_lastpos,
+ Shortread_accession(queryseq5),queryuc_ptr_5,queryrc5,querylength5,query5_lastpos,
indexdb_fwd,indexdb_rev,indexdb_size_threshold,
floors_array,oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
@@ -19077,7 +20831,7 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
} else {
hits_geneminus_5 = align_end(&cutoff_level_5,gmap_history_5,this_geneminus_5,
query5_compress_fwd,query5_compress_rev,
- queryuc_ptr_5,queryrc5,querylength5,query5_lastpos,
+ Shortread_accession(queryseq5),queryuc_ptr_5,queryrc5,querylength5,query5_lastpos,
indexdb_fwd,indexdb_rev,indexdb_size_threshold,
floors_array,oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
@@ -19094,8 +20848,9 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
*stage3array5 = (Stage3end_T *) List_to_array_out(hits5,NULL); List_free(&hits5); /* Return value */
*stage3array5 = Stage3end_eval_and_sort(&(*nhits5),&(*first_absmq5),&(*second_absmq5),
*stage3array5,maxpaths_search,queryseq5,
+ queryuc_ptr_5,queryrc5,
query5_compress_fwd,query5_compress_rev,
- genome,quality_string_5,/*displayp*/true);
+ quality_string_5,/*displayp*/true);
}
*npaths = 0;
@@ -19147,75 +20902,53 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
make_complement_buffered(queryrc3,queryuc_ptr_3,querylength3);
abort_pairing_p_geneplus = false;
- noligos5 = read_oligos(&allvalidp5,this_geneplus_5,queryuc_ptr_5,querylength5,query5_lastpos,/*genestrand*/+1,
- /*first_read_p*/true);
- noligos3 = read_oligos(&allvalidp3,this_geneplus_3,queryuc_ptr_3,querylength3,query3_lastpos,/*genestrand*/+1,
- /*first_read_p*/false);
-
- if (noligos5 == 0 && noligos3 == 0) {
- debug(printf("Aborting because no hits found anywhere\n"));
- hitpairs_geneplus = (List_T) NULL;
-
- } else {
- hitpairs_geneplus = align_pair(&abort_pairing_p_geneplus,&found_score_geneplus,
- &cutoff_level_5,&cutoff_level_3,
- &samechr_geneplus,&conc_transloc_geneplus,&with_terminal_geneplus,
- gmap_history_5,gmap_history_3,
- &hits_geneplus_5,&hits_geneplus_3,this_geneplus_5,this_geneplus_3,
- query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
- queryuc_ptr_5,queryuc_ptr_3,queryrc5,queryrc3,
- querylength5,querylength3,query5_lastpos,query3_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
+ hitpairs_geneplus = align_pair(&abort_pairing_p_geneplus,&found_score_geneplus,
+ &cutoff_level_5,&cutoff_level_3,
+ &samechr_geneplus,&conc_transloc_geneplus,
+ gmap_history_5,gmap_history_3,
+ &hits_geneplus_5,&hits_geneplus_3,this_geneplus_5,this_geneplus_3,
+ query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
+ queryuc_ptr_5,queryuc_ptr_3,queryrc5,queryrc3,
+ querylength5,querylength3,query5_lastpos,query3_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp5,allvalidp3,pairmax,maxpairedpaths,keep_floors_p,
- queryseq5,queryseq3,/*genestrand*/+1);
- }
+ user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ allvalidp5,allvalidp3,pairmax,maxpairedpaths,keep_floors_p,
+ queryseq5,queryseq3,/*genestrand*/+1);
abort_pairing_p_geneminus = false;
- noligos5 = read_oligos(&allvalidp5,this_geneminus_5,queryuc_ptr_5,querylength5,query5_lastpos,/*genestrand*/+2,
- /*first_read_p*/true);
- noligos3 = read_oligos(&allvalidp3,this_geneminus_3,queryuc_ptr_3,querylength3,query3_lastpos,/*genestrand*/+2,
- /*first_read_p*/false);
-
- if (noligos5 == 0 && noligos3 == 0) {
- debug(printf("Aborting because no hits found anywhere\n"));
- hitpairs_geneplus = (List_T) NULL;
-
- } else {
- hitpairs_geneminus = align_pair(&abort_pairing_p_geneminus,&found_score_geneminus,
- &cutoff_level_5,&cutoff_level_3,
- &samechr_geneminus,&conc_transloc_geneminus,&with_terminal_geneminus,
- gmap_history_5,gmap_history_3,
- &hits_geneminus_5,&hits_geneminus_3,this_geneminus_5,this_geneminus_3,
- query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
- queryuc_ptr_5,queryuc_ptr_3,queryrc5,queryrc3,
- querylength5,querylength3,query5_lastpos,query3_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
+ hitpairs_geneminus = align_pair(&abort_pairing_p_geneminus,&found_score_geneminus,
+ &cutoff_level_5,&cutoff_level_3,
+ &samechr_geneminus,&conc_transloc_geneminus,
+ gmap_history_5,gmap_history_3,
+ &hits_geneminus_5,&hits_geneminus_3,this_geneminus_5,this_geneminus_3,
+ query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
+ queryuc_ptr_5,queryuc_ptr_3,queryrc5,queryrc3,
+ querylength5,querylength3,query5_lastpos,query3_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp5,allvalidp3,pairmax,maxpairedpaths,keep_floors_p,
- queryseq5,queryseq3,/*genestrand*/+2);
- }
+ user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ allvalidp5,allvalidp3,pairmax,maxpairedpaths,keep_floors_p,
+ queryseq5,queryseq3,/*genestrand*/+2);
if (found_score_geneplus < found_score_geneminus) {
paired_results_free(this_geneminus_5,this_geneminus_3,hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,
- with_terminal_geneminus,hits_geneminus_5,hits_geneminus_3,querylength5,querylength3);
+ hits_geneminus_5,hits_geneminus_3,querylength5,querylength3);
if (abort_pairing_p_geneplus == true) {
debug16(printf("abort_pairing_p_geneplus is true\n"));
paired_results_free(this_geneplus_5,this_geneplus_3,hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,
- with_terminal_geneplus,hits_geneplus_5,hits_geneplus_3,querylength5,querylength3);
+ hits_geneplus_5,hits_geneplus_3,querylength5,querylength3);
this_geneplus_5 = Stage1_new(querylength5);
this_geneplus_3 = Stage1_new(querylength3);
@@ -19225,7 +20958,7 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,genome,floors_array,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
@@ -19260,14 +20993,14 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
consolidate_paired_results(&(*npaths),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
&(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
&(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
- hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,with_terminal_geneplus,
+ hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,
hits_geneplus_5,hits_geneplus_3,gmap_history_5,gmap_history_3,
query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
plus_segments_genestrand_5,plus_nsegments_genestrand_5,minus_segments_genestrand_5,minus_nsegments_genestrand_5,
plus_segments_genestrand_3,plus_nsegments_genestrand_3,minus_segments_genestrand_3,minus_nsegments_genestrand_3,
- queryseq5,queryuc_ptr_5,quality_string_5,querylength5,query5_lastpos,
- queryseq3,queryuc_ptr_3,quality_string_3,querylength3,query3_lastpos,
- genome,cutoff_level_5,cutoff_level_3,
+ queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
+ queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
+ cutoff_level_5,cutoff_level_3,
localsplicing_penalty,
oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,pairmax,user_maxlevel_5,user_maxlevel_3);
@@ -19284,12 +21017,12 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
} else if (found_score_geneminus < found_score_geneplus) {
paired_results_free(this_geneplus_5,this_geneplus_3,hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,
- with_terminal_geneplus,hits_geneplus_5,hits_geneplus_3,querylength5,querylength3);
+ hits_geneplus_5,hits_geneplus_3,querylength5,querylength3);
if (abort_pairing_p_geneminus == true) {
debug16(printf("abort_pairing_p_geneminus is true\n"));
paired_results_free(this_geneminus_5,this_geneminus_3,hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,
- with_terminal_geneminus,hits_geneminus_5,hits_geneminus_3,querylength5,querylength3);
+ hits_geneminus_5,hits_geneminus_3,querylength5,querylength3);
this_geneminus_5 = Stage1_new(querylength5);
this_geneminus_3 = Stage1_new(querylength3);
@@ -19299,7 +21032,7 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,genome,floors_array,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
@@ -19334,14 +21067,14 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
consolidate_paired_results(&(*npaths),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
&(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
&(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
- hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,with_terminal_geneminus,
+ hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,
hits_geneminus_5,hits_geneminus_3,gmap_history_5,gmap_history_3,
query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
plus_segments_genestrand_5,plus_nsegments_genestrand_5,minus_segments_genestrand_5,minus_nsegments_genestrand_5,
plus_segments_genestrand_3,plus_nsegments_genestrand_3,minus_segments_genestrand_3,minus_nsegments_genestrand_3,
- queryseq5,queryuc_ptr_5,quality_string_5,querylength5,query5_lastpos,
- queryseq3,queryuc_ptr_3,quality_string_3,querylength3,query3_lastpos,
- genome,cutoff_level_5,cutoff_level_3,
+ queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
+ queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
+ cutoff_level_5,cutoff_level_3,
localsplicing_penalty,
oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,pairmax,user_maxlevel_5,user_maxlevel_3);
@@ -19360,7 +21093,6 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
hitpairs = List_append(hitpairs_geneplus,hitpairs_geneminus);
samechr = List_append(samechr_geneplus,samechr_geneminus);
conc_transloc = List_append(conc_transloc_geneplus,conc_transloc_geneminus);
- with_terminal = List_append(with_terminal_geneplus,with_terminal_geneminus);
hits5 = List_append(hits_geneplus_5,hits_geneminus_5);
hits3 = List_append(hits_geneplus_3,hits_geneminus_3);
@@ -19388,13 +21120,13 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
consolidate_paired_results(&(*npaths),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
&(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
&(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
- hitpairs,samechr,conc_transloc,with_terminal,hits5,hits3,gmap_history_5,gmap_history_3,
+ hitpairs,samechr,conc_transloc,hits5,hits3,gmap_history_5,gmap_history_3,
query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
plus_segments_genestrand_5,plus_nsegments_genestrand_5,minus_segments_genestrand_5,minus_nsegments_genestrand_5,
plus_segments_genestrand_3,plus_nsegments_genestrand_3,minus_segments_genestrand_3,minus_nsegments_genestrand_3,
- queryseq5,queryuc_ptr_5,quality_string_5,querylength5,query5_lastpos,
- queryseq3,queryuc_ptr_3,quality_string_3,querylength3,query3_lastpos,
- genome,cutoff_level_5,cutoff_level_3,
+ queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
+ queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
+ cutoff_level_5,cutoff_level_3,
localsplicing_penalty,
oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,pairmax,user_maxlevel_5,user_maxlevel_3);
@@ -19420,7 +21152,7 @@ Stage1_paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T
Stage3end_T **stage3array3, int *nhits3, int *first_absmq3, int *second_absmq3,
Shortread_T queryseq5, Shortread_T queryseq3,
Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev, int indexdb_size_threshold,
- Genome_T genome, Floors_T *floors_array,
+ Floors_T *floors_array,
double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
@@ -19434,7 +21166,7 @@ Stage1_paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T
&(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
&(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
queryseq5,queryseq3,indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- genome,floors_array,user_maxlevel_float,indel_penalty_middle,indel_penalty_end,
+ floors_array,user_maxlevel_float,indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
oligoindices_major,oligoindices_minor,
@@ -19445,7 +21177,7 @@ Stage1_paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T
&(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
&(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
queryseq5,queryseq3,indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- genome,floors_array,user_maxlevel_float,indel_penalty_middle,indel_penalty_end,
+ floors_array,user_maxlevel_float,indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
oligoindices_major,oligoindices_minor,
@@ -19469,14 +21201,15 @@ Stage1hr_cleanup () {
void
Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_in, int index1interval_in,
int spansize_in, Univ_IIT_T chromosome_iit_in, int nchromosomes_in,
- Genome_T genomealt, Mode_T mode_in, int maxpaths_search_in,
+ Genome_T genome_in, Genome_T genomealt, Mode_T mode_in, int maxpaths_search_in,
int terminal_threshold_in, int reject_trimlength_in,
Univcoord_T *splicesites_in, Splicetype_T *splicetypes_in,
Chrpos_T *splicedists_in, int nsplicesites_in,
- bool novelsplicingp_in, bool knownsplicingp_in, bool distances_observed_p_in,
- int subopt_levels_in, Chrpos_T max_middle_insertions_in, Chrpos_T max_middle_deletions_in,
+ bool novelsplicingp_in, bool knownsplicingp_in, bool find_dna_chimeras_p_in,
+ bool distances_observed_p_in, int subopt_levels_in,
+ Chrpos_T max_middle_insertions_in, Chrpos_T max_middle_deletions_in,
Chrpos_T shortsplicedist_in, Chrpos_T shortsplicedist_known_in, Chrpos_T shortsplicedist_novelend_in,
Chrpos_T min_intronlength_in,
@@ -19485,7 +21218,7 @@ Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_
int nullgap_in, int maxpeelback_in, int maxpeelback_distalmedial_in,
int extramaterial_end_in, int extramaterial_paired_in,
int gmap_mode, int trigger_score_for_gmap_in, int gmap_allowance_in,
- int max_gmap_pairsearch_in, int max_gmap_terminal_in,
+ int max_gmap_pairsearch_in, int max_gmap_segments_in,
int max_gmap_improvement_in, int antistranded_penalty_in) {
bool gmapp = false;
@@ -19501,6 +21234,7 @@ Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_
chromosome_iit = chromosome_iit_in;
circular_typeint = Univ_IIT_typeint(chromosome_iit,"circular");
nchromosomes = nchromosomes_in;
+ genome = genome_in;
if (use_only_sarray_p == false) {
Univ_IIT_intervals_setup(&chroffsets,&chrhighs,&chrlengths,chromosome_iit,nchromosomes,circular_typeint);
@@ -19532,6 +21266,7 @@ Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_
novelsplicingp = novelsplicingp_in;
knownsplicingp = knownsplicingp_in;
+ find_dna_chimeras_p = find_dna_chimeras_p_in;
distances_observed_p = distances_observed_p_in;
subopt_levels = subopt_levels_in;
@@ -19560,9 +21295,9 @@ Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_
extramaterial_end = extramaterial_end_in;
extramaterial_paired = extramaterial_paired_in;
+ gmap_segments_p = false;
gmap_pairsearch_p = false;
gmap_indel_knownsplice_p = false;
- gmap_terminal_p = false;
gmap_improvement_p = false;
fprintf(stderr,"GMAP modes:");
@@ -19590,8 +21325,8 @@ Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_
} else {
gmapp = true;
}
- fprintf(stderr," terminal");
- gmap_terminal_p = true;
+ fprintf(stderr," segments");
+ gmap_segments_p = true;
}
if ((gmap_mode & GMAP_IMPROVEMENT) != 0) {
if (gmapp == true) {
@@ -19612,7 +21347,7 @@ Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_
gmap_allowance = gmap_allowance_in;
max_gmap_pairsearch = max_gmap_pairsearch_in;
- max_gmap_terminal = max_gmap_terminal_in;
+ max_gmap_segments = max_gmap_segments_in;
max_gmap_improvement = max_gmap_improvement_in;
antistranded_penalty = antistranded_penalty_in;
diff --git a/src/stage1hr.h b/src/stage1hr.h
index 4e0be6f..f79e5a5 100644
--- a/src/stage1hr.h
+++ b/src/stage1hr.h
@@ -1,4 +1,4 @@
-/* $Id: stage1hr.h 154778 2014-12-06 03:32:33Z twu $ */
+/* $Id: stage1hr.h 166641 2015-05-29 21:13:04Z twu $ */
#ifndef STAGE1HR_INCLUDED
#define STAGE1HR_INCLUDED
@@ -58,7 +58,7 @@ Stage1_free (T *old, int querylength);
extern Stage3end_T *
Stage1_single_read (int *npaths, int *first_absmq, int *second_absmq,
Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev,
- int indexdb_size_threshold, Genome_T genome, Floors_T *floors_array,
+ int indexdb_size_threshold, Floors_T *floors_array,
double usermax_level_float, int indel_penalty_middle, int indel_penalty_end,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
@@ -73,7 +73,7 @@ Stage1_paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T
Stage3end_T **stage3array3, int *nhits3, int *first_absmq3, int *second_absmq3,
Shortread_T queryseq5, Shortread_T queryseq3,
Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev, int indexdb_size_threshold,
- Genome_T genome, Floors_T *floors_array,
+ Floors_T *floors_array,
double usermax_level_float, int indel_penalty_middle, int indel_penalty_end,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
@@ -88,14 +88,15 @@ Stage1hr_cleanup ();
extern void
Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_in, int index1interval_in,
int spansize_in, Univ_IIT_T chromosome_iit_in, int nchromosomes_in,
- Genome_T genomealt, Mode_T mode_in, int maxpaths_search_in,
+ Genome_T genome_in, Genome_T genomealt, Mode_T mode_in, int maxpaths_search_in,
int terminal_threshold_in, int reject_trimlength,
Univcoord_T *splicesites_in, Splicetype_T *splicetypes_in,
Chrpos_T *splicedists_in, int nsplicesites_in,
- bool novelsplicingp_in, bool knownsplicingp_in, bool distances_observed_p_in,
- int subopt_levels_in, Chrpos_T max_middle_insertions_in, Chrpos_T max_middle_deletions_in,
+ bool novelsplicingp_in, bool knownsplicingp_in, bool find_dna_chimeras_p_in,
+ bool distances_observed_p_in, int subopt_levels_in,
+ Chrpos_T max_middle_insertions_in, Chrpos_T max_middle_deletions_in,
Chrpos_T shortsplicedist_in, Chrpos_T shortsplicedist_known_in, Chrpos_T shortsplicedist_novelend_in,
Chrpos_T min_intronlength_in,
diff --git a/src/stage2.c b/src/stage2.c
index 86af87c..acbc11c 100644
--- a/src/stage2.c
+++ b/src/stage2.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage2.c 156846 2015-01-16 01:53:19Z twu $";
+static char rcsid[] = "$Id: stage2.c 166741 2015-06-02 01:24:48Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -116,10 +116,15 @@ static int suboptimal_score_end;
static int suboptimal_score_start;
static Mode_T mode;
static bool snps_p;
+static int sufflookback;
+static int nsufflookback;
+static int maxintronlen;
+
void
Stage2_setup (bool splicingp_in, bool cross_species_p,
int suboptimal_score_start_in, int suboptimal_score_end_in,
+ int sufflookback_in, int nsufflookback_in, int maxintronlen_in,
Mode_T mode_in, bool snps_p_in) {
splicingp = splicingp_in;
if (splicingp == true) {
@@ -134,6 +139,11 @@ Stage2_setup (bool splicingp_in, bool cross_species_p,
}
suboptimal_score_start = suboptimal_score_start_in;
suboptimal_score_end = suboptimal_score_end_in;
+
+ sufflookback = sufflookback_in;
+ nsufflookback = nsufflookback_in;
+ maxintronlen = maxintronlen_in;
+
mode = mode_in;
snps_p = snps_p_in;
return;
@@ -775,7 +785,7 @@ score_querypos_lookback_one (
struct Link_T **links, Chrpos_T **mappings,
int **active, int *firstactive,
Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
- int indexsize, Intlist_T processed, int sufflookback, int nsufflookback, int maxintronlen,
+ int indexsize, Intlist_T processed,
bool anchoredp, bool localp, bool splicingp, bool skip_repetitive_p,
bool use_canonical_p, int non_canonical_penalty) {
Link_T prevlink;
@@ -1150,7 +1160,7 @@ score_querypos_lookback_mult (
struct Link_T **links, Chrpos_T **mappings,
int **active, int *firstactive,
Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
- int indexsize, Intlist_T processed, int sufflookback, int nsufflookback, int maxintronlen,
+ int indexsize, Intlist_T processed,
bool anchoredp, bool localp, bool splicingp, bool skip_repetitive_p,
bool use_canonical_p, int non_canonical_penalty) {
Link_T prevlink, currlink;
@@ -1646,7 +1656,7 @@ score_querypos_lookforward_one (
struct Link_T **links, Chrpos_T **mappings,
int **active, int *firstactive,
Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
- int indexsize, Intlist_T processed, int sufflookback, int nsufflookback, int maxintronlen,
+ int indexsize, Intlist_T processed,
bool anchoredp, bool localp, bool splicingp, bool skip_repetitive_p,
bool use_canonical_p, int non_canonical_penalty) {
Link_T prevlink;
@@ -2013,7 +2023,7 @@ score_querypos_lookforward_mult (
struct Link_T **links, Chrpos_T **mappings,
int **active, int *firstactive,
Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
- int indexsize, Intlist_T processed, int sufflookback, int nsufflookback, int maxintronlen,
+ int indexsize, Intlist_T processed,
bool anchoredp, bool localp, bool splicingp, bool skip_repetitive_p,
bool use_canonical_p, int non_canonical_penalty) {
Link_T prevlink, currlink;
@@ -2507,8 +2517,10 @@ revise_active_lookback (int **active, int *firstactive, int *nactive,
debug6(printf("Revising querypos %d from low_hit %d to high_hit %d. Scores:\n",querypos,low_hit,high_hit));
if ((hit = low_hit) >= high_hit) {
+ debug6(printf("1. Initializing firstactive for querypos %d to be -1\n",querypos));
firstactive[querypos] = -1;
nactive[querypos] = 0;
+
} else {
debug6(printf("At hit %d, fwd_score is %d",hit,links[querypos][hit].fwd_score));
best_score = links[querypos][hit].fwd_score;
@@ -2584,6 +2596,7 @@ revise_active_lookforward (int **active, int *firstactive, int *nactive,
debug6(printf("Revising querypos %d from high_hit %d to low_hit %d. Scores:\n",querypos,high_hit,low_hit));
if ((hit = high_hit - 1) < low_hit) {
+ debug6(printf("2. Initializing firstactive for querypos %d to be -1\n",querypos));
firstactive[querypos] = -1;
nactive[querypos] = 0;
} else {
@@ -3158,7 +3171,7 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
- int indexsize, int sufflookback, int nsufflookback, int maxintronlen,
+ int indexsize,
#ifdef DEBUG9
char *queryseq_ptr,
#endif
@@ -3218,10 +3231,12 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
/* Initialize */
for (querypos = 0; querypos < querystart; querypos++) {
+ debug6(printf("3. Initializing firstactive for querypos %d to be -1\n",querypos));
firstactive[querypos] = -1;
nactive[querypos] = 0;
}
while (querypos <= queryend && npositions[querypos] <= 0) {
+ debug6(printf("4. Initializing firstactive for querypos %d to be -1\n",querypos));
debug9(printf("Skipping querypos %d which has no positions\n",querypos));
firstactive[querypos] = -1;
nactive[querypos] = 0;
@@ -3359,7 +3374,7 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
position = mappings[querypos][low_hit];
debug9(strncpy(oligo,&(queryseq_ptr[querypos]),indexsize));
- debug9(printf("Finding link looking back at querypos %d,%d at %ux%d (%s). prev_querypos was %d\n",
+ debug9(printf("Finding link looking back from querypos %d,%d at %ux%d (%s). prev_querypos was %d\n",
querypos,low_hit,position,active[querypos][low_hit],oligo,processed ? Intlist_head(processed) : -1));
score_querypos_lookback_one(
@@ -3368,7 +3383,7 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
#endif
currlink,querypos,querystart,queryend,position,
links,mappings,active,firstactive,chroffset,chrhigh,plusp,
- indexsize,processed,sufflookback,nsufflookback,maxintronlen,
+ indexsize,processed,
anchoredp,localp,splicingp,skip_repetitive_p,use_canonical_p,
non_canonical_penalty);
@@ -3380,7 +3395,7 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
} else {
debug9(strncpy(oligo,&(queryseq_ptr[querypos]),indexsize));
- debug9(printf("Finding links looking back at querypos %d,%d..%d at (%u..%u) (%s). prev_querypos was %d\n",
+ debug9(printf("Finding links looking back from querypos %d,%d..%d at (%u..%u) (%s). prev_querypos was %d\n",
querypos,low_hit,high_hit-1,mappings[querypos][low_hit],mappings[querypos][high_hit-1],
oligo,processed ? Intlist_head(processed) : -1));
@@ -3391,7 +3406,7 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
low_hit,high_hit,querypos,querystart,queryend,
/*positions*/&(mappings[querypos][low_hit]),
links,mappings,active,firstactive,chroffset,chrhigh,plusp,
- indexsize,processed,sufflookback,nsufflookback,maxintronlen,
+ indexsize,processed,
anchoredp,localp,splicingp,skip_repetitive_p,use_canonical_p,
non_canonical_penalty);
@@ -3518,6 +3533,7 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
querypos = next_querypos;
}
}
+ debug9(printf("End of loop lookback\n"));
Intlist_free(&processed);
@@ -3736,6 +3752,7 @@ traceback_one_snps (int querypos, int hit, struct Link_T **links, Chrpos_T **map
links[querypos][hit].fwd_tracei,links[querypos][hit].fwd_intronnfwd,links[querypos][hit].fwd_intronnrev,
links[querypos][hit].fwd_intronnunk));
debug0(printf("\n"));
+
#ifdef SEPARATE_FWD_REV
} else {
debug0(printf("Pushing %d,%d (%s) at %u, score = %d, consec = %d",
@@ -3777,7 +3794,7 @@ align_compute_lookback (Chrpos_T **mappings, int *npositions, int totalpositions
int *firstactive, int *nactive, Cellpool_T cellpool,
char *queryseq_ptr, char *queryuc_ptr, int querylength, int querystart, int queryend,
Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
- int indexsize, int sufflookback, int nsufflookback, int maxintronlen, Pairpool_T pairpool,
+ int indexsize, Pairpool_T pairpool,
bool anchoredp, int anchor_querypos, Chrpos_T anchor_position,
bool localp, bool skip_repetitive_p, bool use_canonical_p, int non_canonical_penalty,
bool favor_right_p, int max_nalignments, bool debug_graphic_p) {
@@ -3809,7 +3826,7 @@ align_compute_lookback (Chrpos_T **mappings, int *npositions, int totalpositions
chroffset,chrhigh,plusp,
- indexsize,sufflookback,nsufflookback,maxintronlen,
+ indexsize,
#ifdef DEBUG9
queryseq_ptr,
#endif
@@ -3922,7 +3939,7 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
int *firstactive, int *nactive, Cellpool_T cellpool,
int querystart, int queryend, int querylength,
Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
- int indexsize, int sufflookback, int nsufflookback, int maxintronlen,
+ int indexsize,
#ifdef DEBUG9
char *queryseq_ptr,
#endif
@@ -3983,10 +4000,12 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
/* Initialize */
for (querypos = querylength - 1; querypos > queryend; querypos--) {
+ debug6(printf("5. Initializing firstactive for querypos %d to be -1\n",querypos));
firstactive[querypos] = -1;
nactive[querypos] = 0;
}
while (querypos >= querystart && npositions[querypos] <= 0) {
+ debug6(printf("6. Initializing firstactive for querypos %d to be -1\n",querypos));
debug9(printf("Skipping querypos %d which has no positions\n",querypos));
firstactive[querypos] = -1;
nactive[querypos] = 0;
@@ -4124,7 +4143,7 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
position = mappings[querypos][low_hit];
debug9(strncpy(oligo,&(queryseq_ptr[querypos]),indexsize));
- debug9(printf("Finding link looking forward at querypos %d,%d at %ux%d (%s). prev_querypos was %d\n",
+ debug9(printf("Finding link looking forward from querypos %d,%d at %ux%d (%s). prev_querypos was %d\n",
querypos,low_hit,position,active[querypos][low_hit],oligo,processed ? Intlist_head(processed) : -1));
score_querypos_lookforward_one(
#ifdef DEBUG9
@@ -4133,7 +4152,7 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
currlink,querypos,querystart,queryend,position,
links,mappings,active,firstactive,
chroffset,chrhigh,plusp,
- indexsize,processed,sufflookback,nsufflookback,maxintronlen,
+ indexsize,processed,
anchoredp,localp,splicingp,skip_repetitive_p,use_canonical_p,
non_canonical_penalty);
@@ -4145,7 +4164,7 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
} else {
debug9(strncpy(oligo,&(queryseq_ptr[querypos]),indexsize));
- debug9(printf("Finding links looking forward at querypos %d,%d..%d at (%u..%u) (%s). prev_querypos was %d\n",
+ debug9(printf("Finding links looking forward from querypos %d,%d..%d at (%u..%u) (%s). prev_querypos was %d\n",
querypos,high_hit-1,low_hit,mappings[querypos][high_hit-1],mappings[querypos][low_hit],
oligo,processed ? Intlist_head(processed) : -1));
@@ -4156,7 +4175,7 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
low_hit,high_hit,querypos,querystart,queryend,
/*positions*/&(mappings[querypos][low_hit]),
links,mappings,active,firstactive,chroffset,chrhigh,plusp,
- indexsize,processed,sufflookback,nsufflookback,maxintronlen,
+ indexsize,processed,
anchoredp,localp,splicingp,skip_repetitive_p,use_canonical_p,
non_canonical_penalty);
@@ -4283,6 +4302,8 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
querypos = next_querypos;
}
}
+ debug9(printf("End of loop lookforward\n"));
+
Intlist_free(&processed);
@@ -4332,7 +4353,7 @@ align_compute_lookforward (Chrpos_T **mappings, int *npositions, int totalpositi
char *queryseq_ptr, char *queryuc_ptr, int querylength, int querystart, int queryend,
Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
- int indexsize, int sufflookback, int nsufflookback, int maxintronlen, Pairpool_T pairpool,
+ int indexsize, Pairpool_T pairpool,
bool anchoredp, int anchor_querypos, Chrpos_T anchor_position,
bool localp, bool skip_repetitive_p, bool use_canonical_p, int non_canonical_penalty,
bool favor_right_p, int max_nalignments, bool debug_graphic_p) {
@@ -4363,7 +4384,7 @@ align_compute_lookforward (Chrpos_T **mappings, int *npositions, int totalpositi
chroffset,chrhigh,plusp,
- indexsize,sufflookback,nsufflookback,maxintronlen,
+ indexsize,
#ifdef DEBUG9
queryseq_ptr,
#endif
@@ -5060,7 +5081,7 @@ Stage2_scan (int *stage2_source, char *queryuc_ptr, int querylength,
Chrpos_T chrstart, Chrpos_T chrend,
Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
int genestrand, Stage2_alloc_T stage2_alloc, Oligoindex_array_T oligoindices,
- Diagpool_T diagpool, bool debug_graphic_p, bool diagnosticp) {
+ Diagpool_T diagpool, bool debug_graphic_p) {
int ncovered;
int source;
int indexsize;
@@ -5132,12 +5153,14 @@ Stage2_scan (int *stage2_source, char *queryuc_ptr, int querylength,
if (plusp == true) {
Oligoindex_hr_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
/*mappingend*/chroffset+chrend,/*plusp*/true,
- queryuc_ptr,querylength,/*chrpos*/chrstart,genestrand);
+ queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
+ /*chrpos*/chrstart,genestrand);
} else {
/* Need to add 1 to mappingend to cover same range as plusp */
Oligoindex_hr_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
/*mappingend*/chroffset+chrend+1,/*plusp*/false,
- queryuc_ptr,querylength,/*chrpos*/(chrhigh-chroffset)-chrend,genestrand);
+ queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
+ /*chrpos*/(chrhigh-chroffset)-chrend,genestrand);
}
#ifdef EXTRACT_GENOMICSEG
@@ -5150,7 +5173,8 @@ Stage2_scan (int *stage2_source, char *queryuc_ptr, int querylength,
diagonals = Oligoindex_get_mappings(diagonals,coveredp,mappings,npositions,&totalpositions,
&oned_matrix_p,&maxnconsecutive,oligoindices,oligoindex,queryuc_ptr,
- querylength,chrstart,chrend,chroffset,chrhigh,plusp,diagpool);
+ /*querystart*/0,/*queryend*/querylength,querylength,
+ chrstart,chrend,chroffset,chrhigh,plusp,diagpool);
pct_coverage = Diag_update_coverage(coveredp,&ncovered,diagonals,querylength);
debug(printf("Stage2_scan: source = %d, ncovered = %d, pct_coverage = %f\n",source,ncovered,pct_coverage));
@@ -5195,8 +5219,8 @@ Stage2_compute (int *stage2_source, int *stage2_indexsize,
#endif
Oligoindex_array_T oligoindices, double proceed_pctcoverage,
Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
- int sufflookback, int nsufflookback, int maxintronlen, bool localp, bool skip_repetitive_p,
- bool favor_right_p, int max_nalignments, bool debug_graphic_p, bool diagnosticp,
+ bool localp, bool skip_repetitive_p,
+ bool favor_right_p, int max_nalignments, bool debug_graphic_p,
Stopwatch_T stopwatch, bool diag_debug) {
List_T all_stage2results = NULL, all_paths, all_ends, all_starts, end_paths, start_paths, path, pairs, p, q;
List_T middle;
@@ -5216,7 +5240,8 @@ Stage2_compute (int *stage2_source, int *stage2_indexsize,
/* double diag_runtime; */
List_T diagonals;
int anchor_querypos, querystart, queryend;
- Chrpos_T anchor_position;
+ Univcoord_T mappingstart, mappingend;
+ Chrpos_T anchor_position, chrpos, mappinglength;
#ifndef USE_DIAGPOOL
List_T p;
@@ -5284,6 +5309,50 @@ Stage2_compute (int *stage2_source, int *stage2_indexsize,
#endif
Cellpool_reset(cellpool);
diagonals = (List_T) NULL;
+
+
+#ifdef GSNAP
+ mappingstart = chroffset + chrstart;
+ if (plusp == true) {
+ mappingend = chroffset + chrend;
+ chrpos = chrstart;
+ } else {
+ mappingend = chroffset + chrend + 1;
+ chrpos = (chrhigh - chroffset) - chrend;
+ }
+ mappinglength = (Chrpos_T) (mappingend - mappingstart);
+
+ if (mappinglength > 100000) {
+ /* 9-mers */
+ source = 0;
+ } else if (mappinglength > 10000) {
+ /* 8-mers */
+ source = 1;
+ } else {
+ /* 7-mers */
+ source = 2;
+ }
+
+ oligoindex = Oligoindex_array_elt(oligoindices,source);
+ indexsize = Oligoindex_indexsize(oligoindex); /* Different sources can have different indexsizes */
+ /* printf("indexsize = %d\n",indexsize); */
+
+
+#ifdef PMAP
+ Oligoindex_pmap_tally(oligoindex,mappingstart,mappingend,plusp,
+ queryuc_ptr,querylength,chrpos);
+#else
+ Oligoindex_hr_tally(oligoindex,mappingstart,mappingend,plusp,
+ queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
+ chrpos,genestrand);
+#endif
+
+ diagonals = Oligoindex_get_mappings(diagonals,coveredp,mappings,npositions,&totalpositions,
+ &oned_matrix_p,&maxnconsecutive,oligoindices,oligoindex,queryuc_ptr,
+ /*querystart*/0,/*queryend*/querylength,querylength,
+ chrstart,chrend,chroffset,chrhigh,plusp,diagpool);
+#else
+
while (source < Oligoindex_array_length(oligoindices) && pct_coverage < SUFF_PCTCOVERAGE_OLIGOINDEX) {
oligoindex = Oligoindex_array_elt(oligoindices,source);
indexsize = Oligoindex_indexsize(oligoindex); /* Different sources can have different indexsizes */
@@ -5322,11 +5391,13 @@ Stage2_compute (int *stage2_source, int *stage2_indexsize,
if (plusp == true) {
Oligoindex_hr_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
/*mappingend*/chroffset+chrend,/*plusp*/true,
- queryuc_ptr,querylength,/*chrpos*/chrstart,genestrand);
+ queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
+ /*chrpos*/chrstart,genestrand);
} else {
Oligoindex_hr_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
/*mappingend*/chroffset+chrend+1,/*plusp*/false,
- queryuc_ptr,querylength,/*chrpos*/(chrhigh-chroffset)-chrend,genestrand);
+ queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
+ /*chrpos*/(chrhigh-chroffset)-chrend,genestrand);
}
#ifdef EXTRACT_GENOMICSEG
@@ -5340,13 +5411,17 @@ Stage2_compute (int *stage2_source, int *stage2_indexsize,
diagonals = Oligoindex_get_mappings(diagonals,coveredp,mappings,npositions,&totalpositions,
&oned_matrix_p,&maxnconsecutive,oligoindices,oligoindex,queryuc_ptr,
- querylength,chrstart,chrend,chroffset,chrhigh,plusp,diagpool);
+ /*querystart*/0,/*queryend*/querylength,querylength,
+ chrstart,chrend,chroffset,chrhigh,plusp,diagpool);
pct_coverage = Diag_update_coverage(coveredp,&ncovered,diagonals,querylength);
debug(printf("Stage2_compute: source = %d, ndiagonals = %d, ncovered = %d, pct_coverage = %f\n",
source,List_length(diagonals),ncovered,pct_coverage));
source++;
}
+
+#endif
+
*stage2_source = source;
*stage2_indexsize = indexsize;
#ifdef PMAP
@@ -5367,11 +5442,13 @@ Stage2_compute (int *stage2_source, int *stage2_indexsize,
debug(printf("Quitting because totalpositions is zero\n"));
middle = (List_T) NULL;
+#ifndef GSNAP
} else if (querylength > 150 && pct_coverage < proceed_pctcoverage && ncovered < SUFF_NCOVERED) {
/* Filter only on long queries */
debug(printf("Quitting because querylength %d > 150, and pct_coverage is only %f < %f, and ncovered is only %d < %d, maxnconsecutive = %d\n",
querylength,pct_coverage,proceed_pctcoverage,ncovered,SUFF_NCOVERED,maxnconsecutive));
middle = (List_T) NULL;
+#endif
} else {
debug(printf("Proceeding because maxnconsecutive is %d and pct_coverage is %f > %f or ncovered = %d > %d\n",
@@ -5399,7 +5476,7 @@ Stage2_compute (int *stage2_source, int *stage2_indexsize,
queryseq_ptr,queryuc_ptr,querylength,
/*querystart*/diag_querystart,/*queryend*/diag_queryend,
chroffset,chrhigh,plusp,
- indexsize,sufflookback,nsufflookback,maxintronlen,pairpool,
+ indexsize,pairpool,
/*anchoredp*/false,/*anchor_querypos*/0,/*anchor_position*/0,
localp,skip_repetitive_p,use_canonical_middle_p,NON_CANONICAL_PENALTY_MIDDLE,
favor_right_p,max_nalignments,debug_graphic_p);
@@ -5444,7 +5521,7 @@ Stage2_compute (int *stage2_source, int *stage2_indexsize,
oned_matrix_p,minactive,maxactive,firstactive,nactive,cellpool,
queryseq_ptr,queryuc_ptr,querylength,querystart,queryend,
chroffset,chrhigh,plusp,
- indexsize,sufflookback,nsufflookback,maxintronlen,pairpool,
+ indexsize,pairpool,
/*anchoredp*/true,anchor_querypos,anchor_position,
localp,skip_repetitive_p,use_canonical_ends_p,NON_CANONICAL_PENALTY_ENDS,
favor_right_p,max_nalignments,debug_graphic_p);
@@ -5523,7 +5600,7 @@ Stage2_compute (int *stage2_source, int *stage2_indexsize,
oned_matrix_p,minactive,maxactive,firstactive,nactive,cellpool,
queryseq_ptr,queryuc_ptr,querylength,querystart,queryend,
chroffset,chrhigh,plusp,
- indexsize,sufflookback,nsufflookback,maxintronlen,pairpool,
+ indexsize,pairpool,
/*anchoredp*/true,anchor_querypos,anchor_position,
localp,skip_repetitive_p,use_canonical_ends_p,NON_CANONICAL_PENALTY_ENDS,
favor_right_p,max_nalignments,debug_graphic_p);
@@ -5649,9 +5726,9 @@ Stage2_compute_one (int *stage2_source, int *stage2_indexsize,
Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp, int genestrand,
Oligoindex_array_T oligoindices, double proceed_pctcoverage,
Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
- int sufflookback, int nsufflookback, int maxintronlen, bool localp,
+ bool localp,
bool skip_repetitive_p, bool use_shifted_canonical_p,
- bool favor_right_p, bool debug_graphic_p, bool diagnosticp) {
+ bool favor_right_p, bool debug_graphic_p) {
List_T pairs, all_paths;
List_T middle, path;
int indexsize, indexsize_nt;
@@ -5709,18 +5786,21 @@ Stage2_compute_one (int *stage2_source, int *stage2_indexsize,
if (plusp == true) {
Oligoindex_hr_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
/*mappingend*/chroffset+chrend,/*plusp*/true,
- queryuc_ptr,querylength,/*chrpos*/chrstart,genestrand);
+ queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
+ /*chrpos*/chrstart,genestrand);
} else {
Oligoindex_hr_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
/*mappingend*/chroffset+chrend+1,/*plusp*/false,
- queryuc_ptr,querylength,/*chrpos*/(chrhigh-chroffset)-chrend,genestrand);
+ queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
+ /*chrpos*/(chrhigh-chroffset)-chrend,genestrand);
}
#endif
diagonals = Oligoindex_get_mappings(diagonals,coveredp,mappings,npositions,&totalpositions,
&oned_matrix_p,&maxnconsecutive,oligoindices,oligoindex,queryuc_ptr,
- querylength,chrstart,chrend,chroffset,chrhigh,plusp,diagpool);
+ /*querstart*/0,/*queryend*/querylength,querylength,
+ chrstart,chrend,chroffset,chrhigh,plusp,diagpool);
pct_coverage = Diag_update_coverage(coveredp,&ncovered,diagonals,querylength);
debug(printf("Stage2_compute: source = %d, ncovered = %d, pct_coverage = %f\n",source,ncovered,pct_coverage));
@@ -5751,7 +5831,7 @@ Stage2_compute_one (int *stage2_source, int *stage2_indexsize,
queryseq_ptr,queryuc_ptr,querylength,
/*querystart*/0,/*queryend*/querylength-1,
chroffset,chrhigh,plusp,
- indexsize,sufflookback,nsufflookback,maxintronlen,pairpool,
+ indexsize,pairpool,
/*anchoredp*/false,/*anchor_querypos*/0,/*anchor_position*/0,
localp,skip_repetitive_p,use_canonical_middle_p,NON_CANONICAL_PENALTY_MIDDLE,
favor_right_p,/*max_nalignments*/1,debug_graphic_p)) == NULL) {
diff --git a/src/stage2.h b/src/stage2.h
index 2823b7a..c11d1c5 100644
--- a/src/stage2.h
+++ b/src/stage2.h
@@ -1,4 +1,4 @@
-/* $Id: stage2.h 146625 2014-09-02 21:33:41Z twu $ */
+/* $Id: stage2.h 166641 2015-05-29 21:13:04Z twu $ */
#ifndef STAGE2_INCLUDED
#define STAGE2_INCLUDED
@@ -39,7 +39,8 @@ Stage2_free (T *old);
extern void
Stage2_setup (bool splicingp_in, bool cross_species_p,
- int suboptimal_score_start_in, int suboptimal_score_end_in,
+ int suboptimal_score_start_in, int suboptimal_score_end_in,
+ int sufflookback_in, int nsufflookback_in, int maxintronlen_in,
Mode_T mode_in, bool snps_p_in);
extern void
@@ -50,7 +51,7 @@ Stage2_scan (int *stage2_source, char *queryuc_ptr, int querylength,
Chrpos_T chrstart, Chrpos_T chrend,
Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
int genestrand, Stage2_alloc_T stage2_alloc, Oligoindex_array_T oligoindices,
- Diagpool_T diagpool, bool debug_graphic_p, bool diagnosticp);
+ Diagpool_T diagpool, bool debug_graphic_p);
extern List_T
Stage2_compute (int *stage2_source, int *stage2_indexsize,
@@ -62,8 +63,8 @@ Stage2_compute (int *stage2_source, int *stage2_indexsize,
#endif
Oligoindex_array_T oligoindices, double proceed_pctcoverage,
Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
- int sufflookback, int nsufflookback, int maxintronlen, bool localp, bool skip_repetitive_p,
- bool favor_right_p, int max_nalignments, bool debug_graphic_p, bool diagnosticp,
+ bool localp, bool skip_repetitive_p,
+ bool favor_right_p, int max_nalignments, bool debug_graphic_p,
Stopwatch_T stopwatch, bool diag_debug);
extern List_T
@@ -73,8 +74,8 @@ Stage2_compute_one (int *stage2_source, int *stage2_indexsize,
Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp, int genestrand,
Oligoindex_array_T oligoindices, double proceed_pctcoverage,
Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
- int sufflookback, int nsufflookback, int maxintronlen, bool localp, bool skip_repetitive_p,
- bool use_shifted_canonical_p, bool favor_right_p, bool debug_graphic_p, bool diagnosticp);
+ bool localp, bool skip_repetitive_p,
+ bool use_shifted_canonical_p, bool favor_right_p, bool debug_graphic_p);
#undef T
#endif
diff --git a/src/stage3.c b/src/stage3.c
index a0b0d3d..71b2126 100644
--- a/src/stage3.c
+++ b/src/stage3.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 160004 2015-03-03 02:08:27Z twu $";
+static char rcsid[] = "$Id: stage3.c 166984 2015-06-07 02:59:20Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -89,7 +89,7 @@ static char rcsid[] = "$Id: stage3.c 160004 2015-03-03 02:08:27Z twu $";
#define SCORE_SIGDIFF 5
#define PROB_SIGDIFF 0.5
-#define END_SPLICESITE_SEARCH 4
+#define END_SPLICESITE_SEARCH 10
#define END_SPLICESITE_PROB 0.95
#define END_SPLICESITE_EXON_LENGTH 100 /* If shorter than this, then don't look for end splice site */
@@ -295,6 +295,16 @@ static int min_intronlength;
static int max_deletionlength;
static int min_indel_end_matches;
+static int maxpeelback_distalmedial;
+static int nullgap;
+static int extramaterial_end;
+static int extramaterial_paired;
+static int extraband_single;
+static int extraband_end;
+static int extraband_paired;
+static int ngap;
+static int maxintronlen;
+
static bool maximize_coverage_p = false;
static bool output_sam_p;
static Stage3debug_T stage3debug;
@@ -307,6 +317,10 @@ Stage3_setup (bool splicingp_in, bool novelsplicingp_in, bool require_splicedir_
int donor_typeint_in, int acceptor_typeint_in,
Univcoord_T *splicesites_in,
int min_intronlength_in, int max_deletionlength_in, int min_indel_end_matches_in,
+ int maxpeelback_distalmedial_in, int nullgap_in,
+ int extramaterial_end_in, int extramaterial_paired_in,
+ int extraband_single_in, int extraband_end_in, int extraband_paired_in,
+ int ngap_in, int maxintronlen_in,
bool output_sam_p_in, bool homopolymerp_in, Stage3debug_T stage3debug_in) {
splicingp = splicingp_in;
novelsplicingp = novelsplicingp_in;
@@ -323,6 +337,16 @@ Stage3_setup (bool splicingp_in, bool novelsplicingp_in, bool require_splicedir_
max_deletionlength = max_deletionlength_in;
min_indel_end_matches = min_indel_end_matches_in;
+ maxpeelback_distalmedial = maxpeelback_distalmedial_in;
+ nullgap = nullgap_in;
+ extramaterial_end = extramaterial_end_in;
+ extramaterial_paired = extramaterial_paired_in;
+ extraband_single = extraband_single_in;
+ extraband_end = extraband_end_in;
+ extraband_paired = extraband_paired_in;
+ ngap = ngap_in;
+ maxintronlen = maxintronlen_in;
+
output_sam_p = output_sam_p_in;
homopolymerp = homopolymerp_in;
stage3debug = stage3debug_in;
@@ -365,6 +389,8 @@ struct T {
bool chimera_left_p; /* Part of a chimera on its querystart end */
bool chimera_right_p; /* Part of a chimera on its queryend end */
int npairs;
+ List_T cigar_tokens; /* Needed for SAM output */
+ bool intronp;
List_T pairs; /* Winning set of pairs */
@@ -932,6 +958,7 @@ Stage3_overlap (T x, T y) {
*/
+#if 0
static List_T
check_gaps (List_T pairs, Pairpool_T pairpool) {
List_T path = NULL, pairptr;
@@ -1067,6 +1094,7 @@ check_gaps (List_T pairs, Pairpool_T pairpool) {
return path;
}
+#endif
static char complCode[128] = COMPLEMENT_LC;
@@ -1650,7 +1678,7 @@ assign_gap_types (List_T path, int cdna_direction, bool watsonp, char *queryseq_
static List_T
-assign_intron_probs (List_T path, int cdna_direction, bool watsonp, char *queryseq_ptr,
+assign_intron_probs (List_T path, int cdna_direction, bool watsonp,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
Pairpool_T pairpool) {
List_T pairs = NULL, pairptr;
@@ -2306,7 +2334,7 @@ add_intron (List_T pairs, Univcoord_T chroffset, Univcoord_T chrhigh,
static List_T
fix_adjacent_indels (List_T pairs) {
List_T path = NULL, pairptr;
- Pair_T prev, this = NULL, pair;
+ Pair_T this = NULL, pair;
bool in_exon = false;
int Mlength = 0, Ilength = 0, Dlength = 0;
char last_token_type = ' ';
@@ -2315,7 +2343,6 @@ fix_adjacent_indels (List_T pairs) {
debug4(printf("Starting fix_adjacent_indels: "));
while (pairs != NULL) {
- prev = this;
this = (Pair_T) List_head(pairs);
if (this->gapp) {
@@ -2532,6 +2559,7 @@ fix_adjacent_indels (List_T pairs) {
#define DELETION_STATE -1
+#if 0
static List_T
remove_adjacent_ins_del (bool *foundp, List_T pairs) {
List_T path = NULL, pairptr;
@@ -2616,6 +2644,7 @@ remove_adjacent_ins_del (bool *foundp, List_T pairs) {
return path;
}
+#endif
@@ -3270,6 +3299,7 @@ trim_short_end3_exons (bool *trim3p, List_T path,
+#if 0
static bool
dualbreak_p (List_T pairs) {
Pair_T pair;
@@ -3285,8 +3315,10 @@ dualbreak_p (List_T pairs) {
return false;
}
+#endif
+#if 0
static int
dualbreak_distance_from_end (int *npairs, int *totaljump, List_T pairs) {
Pair_T pair;
@@ -3328,8 +3360,10 @@ dualbreak_distance_from_end (int *npairs, int *totaljump, List_T pairs) {
return nmatches - nmismatches;
}
+#endif
+#if 0
static List_T
trim_npairs (List_T pairs, int npairs) {
int i;
@@ -3340,8 +3374,10 @@ trim_npairs (List_T pairs, int npairs) {
}
return pairs;
}
+#endif
+#if 0
static bool
enough_matches (int matches, int genomejump
#if 0
@@ -3380,6 +3416,7 @@ enough_matches (int matches, int genomejump
}
#endif
}
+#endif
static bool
@@ -3494,18 +3531,18 @@ exon_length_3 (List_T path) {
/* Also handles case where novelsplicingp == false */
/* pairs -> pairs */
static List_T
-trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs, int paired_favor_mode, int zero_offset,
- int querylength, int watsonp, int cdna_direction, int maxintronlen
+trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs,
+ int cdna_direction
#ifdef WASTE
, Pairpool_T pairpool
#endif
) {
List_T path, exon, pairptr, p;
- Pair_T pair, medial, indel = NULL, splice = NULL;
- int max_nmatches, max_nmismatches;
+ Pair_T pair, medial, splice = NULL, gappair;
+ int max_nmatches = 0, max_nmismatches;
int nmatches = 0, nmismatches /* = -1 because of the gap */, i;
int max_score, score;
- bool nearindelp = false, nearmismatchp = false, is_canonical;
+ bool nearindelp = false;
double medial_prob;
int nindels;
@@ -3546,6 +3583,7 @@ trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs, int pai
debug3(Pair_dump_list(exon,true));
+ max_nmatches = max_nmismatches = 0;
nmatches = nmismatches = 0;
max_score = score = 0;
/* Skip the intron gap */
@@ -3567,8 +3605,12 @@ trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs, int pai
pair->querypos,score,max_nmatches,max_nmismatches));
}
+ gappair = (Pair_T) List_head(exon);
+ debug3(printf("Gap pair is "));
+ debug3(Pair_dump_one(gappair,true));
+ debug3(printf("\n"));
- if (pair->comp == INDEL_COMP) {
+ if (gappair->comp == INDEL_COMP) {
/* Handle end indel */
/* indel = pair; */
@@ -3589,13 +3631,12 @@ trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs, int pai
/* Skip */
} else {
debug3(printf("Saw mismatch %c medial to 5' end indel\n",medial->comp));
- nearmismatchp = true;
}
}
} else {
/* Handle end exon */
- splice = pair;
+ splice = gappair;
for (p = pairs, i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) {
medial = (Pair_T) p->first;
@@ -3606,7 +3647,6 @@ trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs, int pai
nearindelp = true;
} else {
debug3(printf("Saw mismatch %c medial to 5' end intron\n",medial->comp));
- nearmismatchp = true;
}
}
@@ -3623,7 +3663,6 @@ trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs, int pai
nearindelp = true;
} else {
debug3(printf("Saw mismatch %c distal to 5' end intron\n",distal->comp));
- nearmismatchp = true;
}
}
}
@@ -3712,10 +3751,13 @@ trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs, int pai
*trim5p = true;
#endif
+#if 0
} else if (max_score < 12) {
+ /* This eliminates ambig end information */
debug3(printf("max_score %d < 12, so trimming it\n",max_score));
path = (List_T) NULL;
*trim5p = true;
+#endif
} else if (sufficient_splice_prob_local(List_length(exon),max_nmismatches,
/*distal_spliceprob*/cdna_direction >= 0 ? splice->donor_prob : splice->acceptor_prob,
@@ -3726,7 +3768,7 @@ trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs, int pai
*trim5p = false;
} else {
- debug3(printf("Fall through (bad probabilities): trimming noncanonical 5' exon\n"));
+ debug3(printf("Fall through (bad probabilities %f and %f): trimming noncanonical 5' exon\n",splice->donor_prob,splice->acceptor_prob));
medial_prob = (cdna_direction >= 0) ? splice->acceptor_prob : splice->donor_prob;
if (canonicalp(splice->knowngapp,splice->comp,splice->donor_prob,splice->acceptor_prob,cdna_direction) == true &&
@@ -3762,19 +3804,18 @@ trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs, int pai
/* Also handles case where novelsplicingp == false */
/* path -> path */
static List_T
-trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path, int paired_favor_mode, int zero_offset,
- int querylength, bool watsonp, int cdna_direction,
- int maxintronlen
+trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path,
+ int cdna_direction
#ifdef WASTE
, Pairpool_T pairpool
#endif
) {
List_T pairs, exon, pairptr, p;
- Pair_T pair, medial, indel = NULL, splice = NULL;
- int max_nmatches, max_nmismatches;
+ Pair_T pair, medial, splice = NULL, gappair;
+ int max_nmatches = 0, max_nmismatches;
int nmatches = 0, nmismatches /* = -1 because of the gap */, i;
int max_score, score;
- bool nearindelp = false, nearmismatchp = false, is_canonical;
+ bool nearindelp = false;
double medial_prob;
int nindels;
@@ -3815,6 +3856,7 @@ trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path, int pair
debug3(Pair_dump_list(exon,true));
+ max_nmatches = max_nmismatches = 0;
nmatches = nmismatches = 0;
max_score = score = 0;
/* Skip the intron gap */
@@ -3836,7 +3878,12 @@ trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path, int pair
pair->querypos,score,max_nmatches,max_nmismatches));
}
- if (pair->comp == INDEL_COMP) {
+ gappair = (Pair_T) List_head(exon);
+ debug3(printf("Gap pair is "));
+ debug3(Pair_dump_one(gappair,true));
+ debug3(printf("\n"));
+
+ if (gappair->comp == INDEL_COMP) {
/* Handle end indel */
/* indel = pair; */
@@ -3857,13 +3904,12 @@ trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path, int pair
/* Skip */
} else {
debug3(printf("Saw mismatch medial %c to 3' end indel\n",medial->comp));
- nearmismatchp = true;
}
}
} else {
/* Handle end exon */
- splice = pair;
+ splice = gappair;
for (p = path, i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) {
medial = (Pair_T) p->first;
@@ -3874,7 +3920,6 @@ trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path, int pair
nearindelp = true;
} else {
debug3(printf("Saw mismatch medial %c to 3' end intron\n",medial->comp));
- nearmismatchp = true;
}
}
@@ -3891,7 +3936,6 @@ trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path, int pair
nearindelp = true;
} else {
debug3(printf("Saw mismatch %c distal to 3' end intron\n",distal->comp));
- nearmismatchp = true;
}
}
}
@@ -3980,10 +4024,13 @@ trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path, int pair
*trim3p = true;
#endif
+#if 0
} else if (max_score < 12) {
+ /* This eliminates ambig end information */
debug3(printf("max_score %d < 12, so trimming it\n",max_score));
pairs = (List_T) NULL;
*trim3p = true;
+#endif
} else if (sufficient_splice_prob_local(List_length(exon),max_nmismatches,
/*distal_spliceprob*/cdna_direction >= 0 ? splice->acceptor_prob : splice->donor_prob,
@@ -3994,7 +4041,7 @@ trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path, int pair
*trim3p = false;
} else {
- debug3(printf("Fall through (bad probabilities): trimming noncanonical 3' exon\n"));
+ debug3(printf("Fall through (bad probabilities %f and %f): trimming noncanonical 3' exon\n",splice->donor_prob,splice->acceptor_prob));
medial_prob = (cdna_direction >= 0) ? splice->donor_prob : splice->acceptor_prob;
if (canonicalp(splice->knowngapp,splice->comp,splice->donor_prob,splice->acceptor_prob,cdna_direction) == true &&
@@ -4032,8 +4079,8 @@ trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path, int pair
static List_T
fill_in_gaps (List_T path, Pairpool_T pairpool, char *queryseq_ptr,
Univcoord_T chroffset, Univcoord_T chrhigh,
- int cdna_direction, int sensedir, bool watsonp, int ngap) {
- List_T pairs = NULL, pairptr;
+ int cdna_direction, bool watsonp, int ngap) {
+ List_T pairs = NULL;
Pair_T pair, leftpair, rightpair;
int leftquerypos, leftgenomepos, rightquerypos, rightgenomepos,
@@ -4090,7 +4137,7 @@ fill_in_gaps (List_T path, Pairpool_T pairpool, char *queryseq_ptr,
path = Pairpool_pop(path,&pair);
} else {
- pairptr = path; /* save */
+ /* pairptr = path; -- save */
path = Pairpool_pop(path,&pair);
/* Discard gap; do not push */
@@ -4270,10 +4317,10 @@ Stage3_free_pairarray (T *old) {
/* Does not alter pairs, except for adding subseq_offset to querypos,
in case we need to re-compute alignment for chimera */
static struct Pair_T *
-make_pairarray (int *npairs, List_T *pairs, int cdna_direction, int sensedir, bool watsonp,
+make_pairarray (int *npairs, List_T *pairs, int cdna_direction, bool watsonp,
Pairpool_T pairpool, char *queryseq_ptr,
Univcoord_T chroffset, Univcoord_T chrhigh,
- int ngap, int subseq_offset, int skiplength, bool diagnosticp) {
+ int ngap, int subseq_offset, int skiplength) {
struct Pair_T *pairarray;
List_T printpairs, printpath, path, p;
Pair_T oldpair, newpair;
@@ -4283,7 +4330,7 @@ make_pairarray (int *npairs, List_T *pairs, int cdna_direction, int sensedir, bo
printpath = List_reverse(printpairs);
printpairs = fill_in_gaps(printpath,pairpool,queryseq_ptr,
- chroffset,chrhigh,cdna_direction,sensedir,watsonp,ngap);
+ chroffset,chrhigh,cdna_direction,watsonp,ngap);
if (subseq_offset != 0) {
path = List_reverse(*pairs);
@@ -4327,11 +4374,10 @@ make_pairarray (int *npairs, List_T *pairs, int cdna_direction, int sensedir, bo
/* Does not alter pairs, except for adding subseq_offset to querypos,
in case we need to re-compute alignment for chimera */
static bool
-make_pairarray_merge (T this_left, int cdna_direction, int sensedir, bool watsonp,
+make_pairarray_merge (T this_left, int cdna_direction, bool watsonp,
Pairpool_T pairpool, char *queryseq_ptr,
Univcoord_T chroffset, Univcoord_T chrhigh,
- int ngap, int subseq_offset, int skiplength, bool diagnosticp,
- bool new_gap_p) {
+ int ngap, int subseq_offset, int skiplength, bool new_gap_p) {
struct Pair_T *pairarray, *pairarray_save;
List_T printpairs, printpath, path, p;
Pair_T oldpair, newpair;
@@ -4355,7 +4401,7 @@ make_pairarray_merge (T this_left, int cdna_direction, int sensedir, bool watson
printpath = List_reverse(printpairs);
printpairs = fill_in_gaps(printpath,pairpool,queryseq_ptr,
- chroffset,chrhigh,cdna_direction,sensedir,watsonp,ngap);
+ chroffset,chrhigh,cdna_direction,watsonp,ngap);
if (List_length(printpairs) == 0) {
this_left->pairarray = pairarray_save;
@@ -4437,13 +4483,13 @@ make_pairarrays_chimera (T this_left, T this_right,
printpath_left = List_reverse(printpairs_left);
printpairs_left = fill_in_gaps(printpath_left,pairpool,queryseq_ptr,
this_left->chroffset,this_left->chrhigh,
- this_left->cdna_direction,this_left->sensedir,this_left->watsonp,ngap);
+ this_left->cdna_direction,this_left->watsonp,ngap);
printpairs_right = Pairpool_copy(this_right->pairs,pairpool);
printpath_right = List_reverse(printpairs_right);
printpairs_right = fill_in_gaps(printpath_right,pairpool,queryseq_ptr,
this_right->chroffset,this_right->chrhigh,
- this_right->cdna_direction,this_right->sensedir,this_right->watsonp,ngap);
+ this_right->cdna_direction,this_right->watsonp,ngap);
/* Do not use subseq_offset or skiplength for chimeras, since we are
@@ -4554,7 +4600,7 @@ Stage3_recompute_coverage (List_T stage3list, Sequence_T queryseq) {
T stage3;
Pair_T start, end;
int querypos1, querypos2;
- int trim_start, trim_end, effective_trim_start, effective_trim_end, skiplength;
+ int trim_start, trim_end, skiplength;
trim_start = Sequence_trim_start(queryseq);
trim_end = Sequence_trim_end(queryseq);
@@ -4568,6 +4614,7 @@ Stage3_recompute_coverage (List_T stage3list, Sequence_T queryseq) {
querypos1 = start->querypos;
querypos2 = end->querypos;
+#if 0
if (querypos2 + 1 > trim_end) {
effective_trim_end = querypos2 + 1;
} else {
@@ -4578,6 +4625,7 @@ Stage3_recompute_coverage (List_T stage3list, Sequence_T queryseq) {
} else {
effective_trim_start = trim_start;
}
+#endif
stage3->trimmed_coverage = (double) (querypos2 - querypos1 + 1)/(double) (trim_end - trim_start + skiplength);
}
@@ -4599,7 +4647,7 @@ pick_cdna_direction (int *winning_cdna_direction, int *sensedir,
int nmatches_fwd, int nmismatches_fwd, int nmatches_rev, int nmismatches_rev, int nindels_fwd, int nindels_rev,
int indel_alignment_score_fwd, int indel_alignment_score_rev,
#endif
- int alignment_score_fwd, int alignment_score_rev, int sense_filter) {
+ int sense_filter) {
#if 0
int canonical_score_fwd, canonical_score_rev;
#endif
@@ -4764,6 +4812,8 @@ pick_cdna_direction (int *winning_cdna_direction, int *sensedir,
return pairs_fwd;
}
+ debug11(printf("max_intron_score_fwd = %f, max_intron_score_rev = %f\n",max_intron_score_fwd,max_intron_score_rev));
+
if (*winning_cdna_direction == +1) {
if (ncanonical_fwd == 0 && nsemicanonical_fwd == 0 && nnoncanonical_fwd == 0) {
*sensedir = SENSE_NULL;
@@ -4899,11 +4949,35 @@ Stage3_new (struct Pair_T *pairarray, List_T pairs, int npairs, int goodness, in
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength,
bool watsonp, int querylength, int skiplength, int trimlength, double stage3_runtime,
int straintype, char *strain, IIT_T altstrain_iit) {
- T new = (T) MALLOC(sizeof(*new));
+ T new;
Pair_T start, end;
int *typematches, nmatches;
int alias;
+ List_T cigar_tokens;
+ bool intronp;
+ int hardclip_start, hardclip_end;
+
+ start = &(pairarray[0]);
+ end = &(pairarray[npairs-1]);
+ hardclip_start = start->querypos;
+ hardclip_end = (querylength - 1) - end->querypos;
+
+ cigar_tokens = Pair_compute_cigar(&intronp,&hardclip_start,&hardclip_end,pairarray,npairs,querylength,
+ watsonp,sensedir,/*chimera_part*/0);
+ if (Pair_tokens_cigarlength(cigar_tokens) + hardclip_start + hardclip_end != querylength) {
+ fprintf(stderr,"Could not compute a valid cigar from the following alignment: %d + %d + %d != %d\n",
+ Pair_tokens_cigarlength(cigar_tokens),hardclip_start,hardclip_end,querylength);
+ Pair_dump_array_stderr(pairarray,npairs,/*zerobasedp*/true);
+ Pair_tokens_free(&cigar_tokens);
+ return (T) NULL;
+
+ } else {
+ new = (T) MALLOC(sizeof(*new));
+ new->cigar_tokens = cigar_tokens;
+ new->intronp = intronp;
+ }
+
new->pairarray = pairarray;
new->pairarray_freeable_p = true;
new->chimera_left_p = false;
@@ -4957,9 +5031,6 @@ Stage3_new (struct Pair_T *pairarray, List_T pairs, int npairs, int goodness, in
new->stage2_source = stage2_source;
new->stage2_indexsize = stage2_indexsize;
- start = &(pairarray[0]);
- end = &(pairarray[npairs-1]);
-
new->straintype = straintype;
new->strain = strain;
@@ -5009,6 +5080,7 @@ Stage3_free (T *old) {
if (*old) {
/* Don't free strain. Belongs to altstrain_iit. */
+ Pair_tokens_free(&(*old)->cigar_tokens);
if ((*old)->pairarray_freeable_p == true) {
FREE_OUT((*old)->pairarray);
}
@@ -5050,9 +5122,8 @@ Stage3_translate_cdna (T this, Sequence_T queryaaseq, bool strictp) {
}
void
-Stage3_backtranslate_cdna (T this, bool diagnosticp) {
- Backtranslation_cdna(this->pairarray,this->npairs,this->translation_start,this->translation_end,
- diagnosticp);
+Stage3_backtranslate_cdna (T this) {
+ Backtranslation_cdna(this->pairarray,this->npairs,this->translation_start,this->translation_end);
return;
}
@@ -5169,24 +5240,31 @@ Stage3_fix_cdna_direction (T this, T reference) {
void
Stage3_translate (T this,
#ifdef PMAP
- Sequence_T queryseq, bool diagnosticp,
+ Sequence_T queryseq,
#endif
int querylength, bool fulllengthp,
- int cds_startpos, bool truncatep, bool strictp,
- bool maponlyp) {
+ int cds_startpos, bool truncatep, bool strictp) {
- if (maponlyp == true) {
- this->translation_start = 0;
- this->translation_end = 0;
- this->translation_length = 0;
- } else {
#ifdef PMAP
- Translation_via_cdna(&this->translation_start,&this->translation_end,&this->translation_length,
- &this->relaastart,&this->relaaend,
- this->pairarray,this->npairs,Sequence_fullpointer(queryseq),strictp);
- Backtranslation_cdna(this->pairarray,this->npairs,this->translation_start,this->translation_end,
- diagnosticp);
+ Translation_via_cdna(&this->translation_start,&this->translation_end,&this->translation_length,
+ &this->relaastart,&this->relaaend,
+ this->pairarray,this->npairs,Sequence_fullpointer(queryseq),strictp);
+ Backtranslation_cdna(this->pairarray,this->npairs,this->translation_start,this->translation_end);
#else
+ if (this->cdna_direction < 0) {
+ Translation_via_genomic(&this->translation_start,&this->translation_end,&this->translation_length,
+ &this->relaastart,&this->relaaend,
+ this->pairarray,this->npairs,/*backwardsp*/true,/*revcompp*/true,fulllengthp,
+ cds_startpos,querylength,strictp);
+ } else {
+ Translation_via_genomic(&this->translation_start,&this->translation_end,&this->translation_length,
+ &this->relaastart,&this->relaaend,
+ this->pairarray,this->npairs,/*backwardsp*/false,/*revcompp*/false,fulllengthp,
+ cds_startpos,querylength,strictp);
+ }
+
+ if (truncatep == true) {
+ truncate_fulllength(this,/*translatep*/false,cds_startpos,querylength,strictp);
if (this->cdna_direction < 0) {
Translation_via_genomic(&this->translation_start,&this->translation_end,&this->translation_length,
&this->relaastart,&this->relaaend,
@@ -5198,23 +5276,9 @@ Stage3_translate (T this,
this->pairarray,this->npairs,/*backwardsp*/false,/*revcompp*/false,fulllengthp,
cds_startpos,querylength,strictp);
}
- if (truncatep == true) {
- truncate_fulllength(this,/*translatep*/false,cds_startpos,querylength,strictp);
- if (this->cdna_direction < 0) {
- Translation_via_genomic(&this->translation_start,&this->translation_end,&this->translation_length,
- &this->relaastart,&this->relaaend,
- this->pairarray,this->npairs,/*backwardsp*/true,/*revcompp*/true,fulllengthp,
- cds_startpos,querylength,strictp);
- } else {
- Translation_via_genomic(&this->translation_start,&this->translation_end,&this->translation_length,
- &this->relaastart,&this->relaaend,
- this->pairarray,this->npairs,/*backwardsp*/false,/*revcompp*/false,fulllengthp,
- cds_startpos,querylength,strictp);
- }
- return;
- }
-#endif
}
+#endif
+
return;
}
@@ -5222,34 +5286,36 @@ Stage3_translate (T this,
void
Stage3_translate_chimera (T this, T mate,
#ifdef PMAP
- Sequence_T queryseq, bool diagnosticp,
+ Sequence_T queryseq,
#endif
int querylength, bool fulllengthp,
- int cds_startpos, bool truncatep, bool strictp,
- bool maponlyp) {
+ int cds_startpos, bool truncatep, bool strictp) {
int npairs1, npairs2;
int translation_start, translation_end, translation_length, relaastart, relaaend;
- if (maponlyp == true) {
- this->translation_start = 0;
- this->translation_end = 0;
- this->translation_length = 0;
-
- mate->translation_start = 0;
- mate->translation_end = 0;
- mate->translation_length = 0;
-
- } else {
- npairs1 = this->npairs;
- npairs2 = mate->npairs;
+ npairs1 = this->npairs;
+ npairs2 = mate->npairs;
#ifdef PMAP
- Translation_via_cdna(&translation_start,&translation_end,&translation_length,
- &relaastart,&relaaend,
- this->pairarray,npairs1 + npairs2,Sequence_fullpointer(queryseq),strictp);
- Backtranslation_cdna(this->pairarray,npairs1 + npairs2,translation_start,translation_end,
- diagnosticp);
+ Translation_via_cdna(&translation_start,&translation_end,&translation_length,
+ &relaastart,&relaaend,
+ this->pairarray,npairs1 + npairs2,Sequence_fullpointer(queryseq),strictp);
+ Backtranslation_cdna(this->pairarray,npairs1 + npairs2,translation_start,translation_end);
#else
+ if (this->cdna_direction < 0) {
+ Translation_via_genomic(&translation_start,&translation_end,&translation_length,
+ &relaastart,&relaaend,
+ this->pairarray,npairs1 + npairs2,/*backwardsp*/true,/*revcompp*/true,fulllengthp,
+ cds_startpos,querylength,strictp);
+ } else {
+ Translation_via_genomic(&translation_start,&translation_end,&translation_length,
+ &relaastart,&relaaend,
+ this->pairarray,npairs1 + npairs2,/*backwardsp*/false,/*revcompp*/false,fulllengthp,
+ cds_startpos,querylength,strictp);
+ }
+
+ if (truncatep == true) {
+ truncate_fulllength(this,/*translatep*/false,cds_startpos,querylength,strictp);
if (this->cdna_direction < 0) {
Translation_via_genomic(&translation_start,&translation_end,&translation_length,
&relaastart,&relaaend,
@@ -5261,70 +5327,54 @@ Stage3_translate_chimera (T this, T mate,
this->pairarray,npairs1 + npairs2,/*backwardsp*/false,/*revcompp*/false,fulllengthp,
cds_startpos,querylength,strictp);
}
-
- if (truncatep == true) {
- truncate_fulllength(this,/*translatep*/false,cds_startpos,querylength,strictp);
- if (this->cdna_direction < 0) {
- Translation_via_genomic(&translation_start,&translation_end,&translation_length,
- &relaastart,&relaaend,
- this->pairarray,npairs1 + npairs2,/*backwardsp*/true,/*revcompp*/true,fulllengthp,
- cds_startpos,querylength,strictp);
- } else {
- Translation_via_genomic(&translation_start,&translation_end,&translation_length,
- &relaastart,&relaaend,
- this->pairarray,npairs1 + npairs2,/*backwardsp*/false,/*revcompp*/false,fulllengthp,
- cds_startpos,querylength,strictp);
- }
- }
+ }
#endif
- if (translation_start < npairs1) {
- this->translation_start = translation_start;
- mate->translation_start = 0;
- } else {
- this->translation_start = npairs1 - 1;
- mate->translation_start = translation_start - npairs1;
- }
- if (translation_end < npairs1) {
- this->translation_end = translation_end;
- mate->translation_end = 0;
- } else {
- this->translation_end = npairs1 - 1;
- mate->translation_end = translation_end - npairs1;
- }
-
- /* Additional checks to stay within array bounds */
- if (this->translation_end >= this->npairs) {
- this->translation_end = this->npairs - 1;
- }
- if (this->translation_start > this->translation_end) {
- this->translation_start = this->translation_end;
- }
+ if (translation_start < npairs1) {
+ this->translation_start = translation_start;
+ mate->translation_start = 0;
+ } else {
+ this->translation_start = npairs1 - 1;
+ mate->translation_start = translation_start - npairs1;
+ }
+ if (translation_end < npairs1) {
+ this->translation_end = translation_end;
+ mate->translation_end = 0;
+ } else {
+ this->translation_end = npairs1 - 1;
+ mate->translation_end = translation_end - npairs1;
+ }
- if (mate->translation_end >= mate->npairs) {
- mate->translation_end = mate->npairs - 1;
- }
- if (mate->translation_start > mate->translation_end) {
- mate->translation_start = mate->translation_end;
- }
+ /* Additional checks to stay within array bounds */
+ if (this->translation_end >= this->npairs) {
+ this->translation_end = this->npairs - 1;
+ }
+ if (this->translation_start > this->translation_end) {
+ this->translation_start = this->translation_end;
+ }
- debug(printf("Converted translation %d..%d in %d+%d pairs to %d..%d and %d..%d\n",
- translation_start,translation_end,this->npairs,mate->npairs,
- this->translation_start,this->translation_end,mate->translation_start,mate->translation_end));
+ if (mate->translation_end >= mate->npairs) {
+ mate->translation_end = mate->npairs - 1;
+ }
+ if (mate->translation_start > mate->translation_end) {
+ mate->translation_start = mate->translation_end;
+ }
- this->translation_length = Pair_translation_length(this->pairarray,this->npairs);
- mate->translation_length = Pair_translation_length(mate->pairarray,mate->npairs);
- debug(printf("Original translation length %d => %d plus %d\n",
- translation_length,this->translation_length,mate->translation_length));
+ debug(printf("Converted translation %d..%d in %d+%d pairs to %d..%d and %d..%d\n",
+ translation_start,translation_end,this->npairs,mate->npairs,
+ this->translation_start,this->translation_end,mate->translation_start,mate->translation_end));
- this->relaastart = this->pairarray[this->translation_start].aapos;
- this->relaaend = this->pairarray[this->translation_end].aapos;
+ this->translation_length = Pair_translation_length(this->pairarray,this->npairs);
+ mate->translation_length = Pair_translation_length(mate->pairarray,mate->npairs);
+ debug(printf("Original translation length %d => %d plus %d\n",
+ translation_length,this->translation_length,mate->translation_length));
- mate->relaastart = mate->pairarray[mate->translation_start].aapos;
- mate->relaaend = mate->pairarray[mate->translation_end].aapos;
+ this->relaastart = this->pairarray[this->translation_start].aapos;
+ this->relaaend = this->pairarray[this->translation_end].aapos;
- }
+ mate->relaastart = mate->pairarray[mate->translation_start].aapos;
+ mate->relaaend = mate->pairarray[mate->translation_end].aapos;
return;
}
@@ -5332,9 +5382,9 @@ Stage3_translate_chimera (T this, T mate,
void
-Stage3_print_pathsummary (FILE *fp, T this, int pathnum, Univ_IIT_T chromosome_iit, Univ_IIT_T contig_iit,
+Stage3_print_pathsummary (Filestring_T fp, T this, int pathnum, Univ_IIT_T chromosome_iit, Univ_IIT_T contig_iit,
IIT_T altstrain_iit, Sequence_T queryseq,
- char *dbversion, int maxmutations, bool diagnosticp, bool maponlyp) {
+ char *dbversion, int maxmutations) {
Pair_T start, end;
bool referencealignp;
@@ -5349,19 +5399,17 @@ Stage3_print_pathsummary (FILE *fp, T this, int pathnum, Univ_IIT_T chromosome_i
this->qopens,this->qindels,this->topens,this->tindels,this->goodness,
this->watsonp,this->cdna_direction,
this->translation_start,this->translation_end,this->translation_length,
- 0,0,maponlyp,diagnosticp,this->stage2_source,this->stage2_indexsize);
- if (maponlyp == false) {
- Translation_print_comparison(fp,this->pairarray,this->npairs,NULL,0,this->cdna_direction,
- this->relaastart,this->relaaend,maxmutations);
- }
- fprintf(fp,"\n");
+ /*relaastart*/0,/*relaaend*/0,this->stage2_source,this->stage2_indexsize);
+ Translation_print_comparison(fp,this->pairarray,this->npairs,NULL,0,this->cdna_direction,
+ this->relaastart,this->relaaend,maxmutations);
+ FPRINTF(fp,"\n");
return;
}
void
-Stage3_print_pslformat_nt (FILE *fp, T this, Univ_IIT_T chromosome_iit, Sequence_T usersegment, Sequence_T queryaaseq) {
+Stage3_print_pslformat_nt (Filestring_T fp, T this, Univ_IIT_T chromosome_iit, Sequence_T usersegment, Sequence_T queryaaseq) {
Pair_T start, end;
start = &(this->pairarray[0]);
@@ -5377,12 +5425,12 @@ Stage3_print_pslformat_nt (FILE *fp, T this, Univ_IIT_T chromosome_iit, Sequence
#ifdef PMAP
void
-Stage3_print_pslformat_pro (FILE *fp, T this, Univ_IIT_T chromosome_iit, Sequence_T usersegment, Sequence_T queryaaseq, bool strictp) {
+Stage3_print_pslformat_pro (Filestring_T fp, T this, Univ_IIT_T chromosome_iit, Sequence_T usersegment, Sequence_T queryaaseq, bool strictp) {
Pair_T start, end;
#if 0
Stage3_translate_cdna(this,queryaaseq,strictp);
- Stage3_backtranslate_cdna(this,/*diagnosticp*/false);
+ Stage3_backtranslate_cdna(this);
#endif
start = &(this->pairarray[0]);
@@ -5398,7 +5446,7 @@ Stage3_print_pslformat_pro (FILE *fp, T this, Univ_IIT_T chromosome_iit, Sequenc
void
-Stage3_print_gff3 (FILE *fp, T this, int pathnum, Univ_IIT_T chromosome_iit, Sequence_T usersegment,
+Stage3_print_gff3 (Filestring_T fp, T this, int pathnum, Univ_IIT_T chromosome_iit, Sequence_T usersegment,
Sequence_T queryseq, int querylength, Printtype_T printtype, char *sourcename) {
Pair_T start, end;
bool gff_gene_format_p, gff_estmatch_format_p;
@@ -5435,7 +5483,7 @@ Stage3_print_gff3 (FILE *fp, T this, int pathnum, Univ_IIT_T chromosome_iit, Seq
#ifndef PMAP
/* Only for GMAP program */
void
-Stage3_print_sam (FILE *fp, char *abbrev, T this, int pathnum, int npaths,
+Stage3_print_sam (Filestring_T fp, char *abbrev, T this, int pathnum, int npaths,
int absmq_score, int first_absmq, int second_absmq, int mapq_score,
Univ_IIT_T chromosome_iit, Sequence_T usersegment,
Sequence_T queryseq, int chimera_part, Chimera_T chimera,
@@ -5454,34 +5502,34 @@ Stage3_print_sam (FILE *fp, char *abbrev, T this, int pathnum, int npaths,
}
if (this->circularpos > 0) {
- Pair_print_sam(fp,abbrev,this->pairarray,this->npairs,
+ Pair_print_sam(fp,abbrev,this->pairarray,this->npairs,this->cigar_tokens,this->intronp,
Sequence_accession(queryseq),/*acc2*/NULL,this->chrnum,chromosome_iit,usersegment,
Sequence_fullpointer(queryseq),Sequence_quality_string(queryseq),
/*clipdir*/0,/*hardclip5*/0,/*hardclip3*/querylength-this->circularpos,querylength,
- this->watsonp,this->cdna_direction,chimera_part,chimera,
+ this->watsonp,this->sensedir,chimera_part,chimera,
quality_shift,Sequence_firstp(queryseq),
pathnum,npaths,absmq_score,first_absmq,second_absmq,chrpos,this->chrlength,
mapq_score,sam_paired_p,sam_read_group_id,/*invertp*/false,
- /*circularp*/true,/*merged_overlap_p*/false);
- Pair_print_sam(fp,abbrev,this->pairarray,this->npairs,
+ /*circularp*/true,/*merged_overlap_p*/false,/*sarrayp*/false);
+ Pair_print_sam(fp,abbrev,this->pairarray,this->npairs,this->cigar_tokens,this->intronp,
Sequence_accession(queryseq),/*acc2*/NULL,this->chrnum,chromosome_iit,usersegment,
Sequence_fullpointer(queryseq),Sequence_quality_string(queryseq),
/*clipdir*/0,/*hardclip5*/this->circularpos,/*hardclip3*/0,querylength,
- this->watsonp,this->cdna_direction,chimera_part,chimera,
+ this->watsonp,this->sensedir,chimera_part,chimera,
quality_shift,Sequence_firstp(queryseq),
pathnum,npaths,absmq_score,first_absmq,second_absmq,/*chrpos*/1,this->chrlength,
mapq_score,sam_paired_p,sam_read_group_id,/*invertp*/false,
- /*circularp*/true,/*merged_overlap_p*/false);
+ /*circularp*/true,/*merged_overlap_p*/false,/*sarrayp*/false);
} else {
- Pair_print_sam(fp,abbrev,this->pairarray,this->npairs,
+ Pair_print_sam(fp,abbrev,this->pairarray,this->npairs,this->cigar_tokens,this->intronp,
Sequence_accession(queryseq),/*acc2*/NULL,this->chrnum,chromosome_iit,usersegment,
Sequence_fullpointer(queryseq),Sequence_quality_string(queryseq),
/*clipdir*/0,/*hardclip5*/0,/*hardclip3*/0,querylength,
- this->watsonp,this->cdna_direction,chimera_part,chimera,
+ this->watsonp,this->sensedir,chimera_part,chimera,
quality_shift,Sequence_firstp(queryseq),
pathnum,npaths,absmq_score,first_absmq,second_absmq,chrpos,this->chrlength,
mapq_score,sam_paired_p,sam_read_group_id,/*invertp*/false,
- /*circularp*/false,/*merged_overlap_p*/false);
+ /*circularp*/false,/*merged_overlap_p*/false,/*sarrayp*/false);
}
return;
@@ -5491,7 +5539,7 @@ Stage3_print_sam (FILE *fp, char *abbrev, T this, int pathnum, int npaths,
void
-Stage3_print_iit_map (FILE *fp, T this, Univ_IIT_T chromosome_iit, Sequence_T queryseq) {
+Stage3_print_iit_map (Filestring_T fp, T this, Univ_IIT_T chromosome_iit, Sequence_T queryseq) {
Pair_T start, end;
start = &(this->pairarray[0]);
@@ -5503,7 +5551,7 @@ Stage3_print_iit_map (FILE *fp, T this, Univ_IIT_T chromosome_iit, Sequence_T qu
}
void
-Stage3_print_iit_exon_map (FILE *fp, T this, Univ_IIT_T chromosome_iit, Sequence_T queryseq) {
+Stage3_print_iit_exon_map (Filestring_T fp, T this, Univ_IIT_T chromosome_iit, Sequence_T queryseq) {
Pair_T start, end;
start = &(this->pairarray[0]);
@@ -5515,7 +5563,7 @@ Stage3_print_iit_exon_map (FILE *fp, T this, Univ_IIT_T chromosome_iit, Sequence
}
void
-Stage3_print_splicesites (FILE *fp, T this, Univ_IIT_T chromosome_iit, Sequence_T queryseq) {
+Stage3_print_splicesites (Filestring_T fp, T this, Univ_IIT_T chromosome_iit, Sequence_T queryseq) {
Pair_print_splicesites(fp,this->pairarray,this->npairs,Sequence_accession(queryseq),
Pair_nexons(this->pairarray,this->npairs),this->chrnum,
chromosome_iit,this->watsonp);
@@ -5523,7 +5571,7 @@ Stage3_print_splicesites (FILE *fp, T this, Univ_IIT_T chromosome_iit, Sequence_
}
void
-Stage3_print_introns (FILE *fp, T this, Univ_IIT_T chromosome_iit, Sequence_T queryseq) {
+Stage3_print_introns (Filestring_T fp, T this, Univ_IIT_T chromosome_iit, Sequence_T queryseq) {
Pair_print_introns(fp,this->pairarray,this->npairs,Sequence_accession(queryseq),
Pair_nexons(this->pairarray,this->npairs),this->chrnum,
chromosome_iit);
@@ -5533,8 +5581,8 @@ Stage3_print_introns (FILE *fp, T this, Univ_IIT_T chromosome_iit, Sequence_T qu
void
-Stage3_print_mutations (FILE *fp, T this, T reference, Univ_IIT_T chromosome_iit, Sequence_T queryseq,
- char *dbversion, bool showalignp, bool diagnosticp,
+Stage3_print_mutations (Filestring_T fp, T this, T reference, Univ_IIT_T chromosome_iit, Sequence_T queryseq,
+ char *dbversion, bool showalignp,
int invertmode, bool nointronlenp, int wraplength,
int maxmutations) {
Pair_T start, end;
@@ -5552,16 +5600,15 @@ Stage3_print_mutations (FILE *fp, T this, T reference, Univ_IIT_T chromosome_iit
Sequence_trim_start(queryseq),Sequence_trim_end(queryseq),
Pair_nexons(this->pairarray,this->npairs),this->matches,this->unknowns,this->mismatches,
this->qopens,this->qindels,this->topens,this->tindels,this->goodness,
- this->watsonp,this->cdna_direction,
- 0,0,0,this->relaastart,this->relaaend,/*maponlyp*/false,
- diagnosticp,this->stage2_source,this->stage2_indexsize);
+ this->watsonp,this->cdna_direction,0,0,0,this->relaastart,this->relaaend,
+ this->stage2_source,this->stage2_indexsize);
Translation_print_comparison(fp,this->pairarray,this->npairs,reference->pairarray,reference->npairs,
this->cdna_direction,this->relaastart,this->relaaend,maxmutations);
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
if (showalignp == true) {
Pair_print_alignment(fp,this->pairarray,this->npairs,reference->chrnum,reference->chroffset,
- chromosome_iit,this->watsonp,diagnosticp,invertmode,nointronlenp,wraplength);
+ chromosome_iit,this->watsonp,invertmode,nointronlenp,wraplength);
}
debug1(Pair_dump_array(this->pairarray,this->npairs,/*zerobasedp*/true));
debug1(Pair_check_array(this->pairarray,this->npairs));
@@ -5572,7 +5619,7 @@ Stage3_print_mutations (FILE *fp, T this, T reference, Univ_IIT_T chromosome_iit
static void
-print_map (FILE *fp, T this, IIT_T map_iit, int *map_divint_crosstable,
+print_map (Filestring_T fp, T this, IIT_T map_iit, int *map_divint_crosstable,
Univ_IIT_T chromosome_iit, int pathnum, bool map_bothstrands_p,
int nflanking, bool print_comment_p) {
int chrlow, chrhigh;
@@ -5583,7 +5630,7 @@ print_map (FILE *fp, T this, IIT_T map_iit, int *map_divint_crosstable,
char *chr;
if ((divno = map_divint_crosstable[this->chrnum]) <= 0) {
- fprintf(fp," *Map hits for path %d (0):\n\n",pathnum);
+ FPRINTF(fp," *Map hits for path %d (0):\n\n",pathnum);
return;
} else {
chr = Chrnum_to_string(this->chrnum,chromosome_iit);
@@ -5610,19 +5657,19 @@ print_map (FILE *fp, T this, IIT_T map_iit, int *map_divint_crosstable,
divno,chrlow,chrhigh,nflanking,/*sign*/0);
}
if (nflanking > 0) {
- fprintf(fp," Map hits for path %d (%d|%d|%d):\n",pathnum,nleftflanks,nmatches,nrightflanks);
+ FPRINTF(fp," Map hits for path %d (%d|%d|%d):\n",pathnum,nleftflanks,nmatches,nrightflanks);
} else {
- fprintf(fp," Map hits for path %d (%d):\n",pathnum,nmatches);
+ FPRINTF(fp," Map hits for path %d (%d):\n",pathnum,nmatches);
}
if (nflanking > 0) {
IIT_print_header(fp,map_iit,leftflanks,nleftflanks,/*bothstrandsp*/true,chr,
/*reversep*/true,/*relativep*/false,/*left*/0U,print_comment_p);
- fprintf(fp," ====================\n");
+ FPRINTF(fp," ====================\n");
}
IIT_print_header(fp,map_iit,iit_matches,nmatches,/*bothstrandsp*/true,chr,
/*reversep*/false,/*relativep*/false,/*left*/0U,print_comment_p);
if (nflanking > 0) {
- fprintf(fp," ====================\n");
+ FPRINTF(fp," ====================\n");
IIT_print_header(fp,map_iit,rightflanks,nrightflanks,/*bothstrandsp*/true,chr,
/*reversep*/false,/*relativep*/false,/*left*/0U,print_comment_p);
}
@@ -5634,24 +5681,24 @@ print_map (FILE *fp, T this, IIT_T map_iit, int *map_divint_crosstable,
divno,chrlow,chrhigh,nflanking,sign);
}
if (nflanking > 0) {
- fprintf(fp," Map hits for path %d (%d|%d|%d):\n",pathnum,nleftflanks,nmatches,nrightflanks);
+ FPRINTF(fp," Map hits for path %d (%d|%d|%d):\n",pathnum,nleftflanks,nmatches,nrightflanks);
} else {
- fprintf(fp," Map hits for path %d (%d):\n",pathnum,nmatches);
+ FPRINTF(fp," Map hits for path %d (%d):\n",pathnum,nmatches);
}
if (nflanking > 0) {
IIT_print_header(fp,map_iit,leftflanks,nleftflanks,/*bothstrandsp*/false,chr,
/*reversep*/true,/*relativep*/false,/*left*/0U,print_comment_p);
- fprintf(fp," ====================\n");
+ FPRINTF(fp," ====================\n");
}
IIT_print_header(fp,map_iit,iit_matches,nmatches,/*bothstrandsp*/false,chr,
/*reversep*/false,/*relativep*/false,/*left*/0U,print_comment_p);
if (nflanking > 0) {
- fprintf(fp," ====================\n");
+ FPRINTF(fp," ====================\n");
IIT_print_header(fp,map_iit,rightflanks,nrightflanks,/*bothstrandsp*/false,chr,
/*reversep*/false,/*relativep*/false,/*left*/0U,print_comment_p);
}
}
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
if (nflanking > 0) {
FREE(rightflanks);
@@ -5666,7 +5713,7 @@ print_map (FILE *fp, T this, IIT_T map_iit, int *map_divint_crosstable,
/* Doesn't handle nflanking */
static void
-print_exon_map (FILE *fp, T this, IIT_T map_iit, int *map_divint_crosstable,
+print_exon_map (Filestring_T fp, T this, IIT_T map_iit, int *map_divint_crosstable,
Univ_IIT_T chromosome_iit, int pathnum, bool map_bothstrands_p, bool print_comment_p) {
Uintlist_T exonbounds;
Chrpos_T position1, position2;
@@ -5675,7 +5722,7 @@ print_exon_map (FILE *fp, T this, IIT_T map_iit, int *map_divint_crosstable,
char *chr;
if ((divno = map_divint_crosstable[this->chrnum]) <= 0) {
- fprintf(fp," *Map hits for path %d (0):\n\n",pathnum);
+ FPRINTF(fp," *Map hits for path %d (0):\n\n",pathnum);
return;
} else {
chr = Chrnum_to_string(this->chrnum,chromosome_iit);
@@ -5693,7 +5740,7 @@ print_exon_map (FILE *fp, T this, IIT_T map_iit, int *map_divint_crosstable,
} else {
iit_matches = IIT_get(&nmatches,map_iit,chr,position2,position1,/*sortp*/true);
}
- fprintf(fp," Map hits for path %d, exon %d (%d):\n",pathnum,++exonno,nmatches);
+ FPRINTF(fp," Map hits for path %d, exon %d (%d):\n",pathnum,++exonno,nmatches);
IIT_print_header(fp,map_iit,iit_matches,nmatches,/*bothstrandsp*/true,chr,
/*reversep*/false,/*relativep*/false,/*left*/0U,print_comment_p);
@@ -5705,11 +5752,11 @@ print_exon_map (FILE *fp, T this, IIT_T map_iit, int *map_divint_crosstable,
iit_matches = IIT_get_signed_with_divno(&nmatches,map_iit,divno,position2,position1,
/*sortp*/true,/*sign*/-1);
}
- fprintf(fp," Map hits for path %d, exon %d (%d):\n",pathnum,++exonno,nmatches);
+ FPRINTF(fp," Map hits for path %d, exon %d (%d):\n",pathnum,++exonno,nmatches);
IIT_print_header(fp,map_iit,iit_matches,nmatches,/*bothstrandsp*/false,chr,
/*reversep*/false,/*relativep*/false,/*left*/0U,print_comment_p);
}
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
FREE(iit_matches);
}
@@ -5717,7 +5764,7 @@ print_exon_map (FILE *fp, T this, IIT_T map_iit, int *map_divint_crosstable,
}
void
-Stage3_print_map (FILE *fp, T this, IIT_T map_iit, int *map_divint_crosstable, Univ_IIT_T chromosome_iit,
+Stage3_print_map (Filestring_T fp, T this, IIT_T map_iit, int *map_divint_crosstable, Univ_IIT_T chromosome_iit,
int pathnum, bool map_exons_p, bool map_bothstrands_p, int nflanking,
bool print_comment_p) {
if (map_exons_p == true) {
@@ -5734,35 +5781,25 @@ Stage3_print_map (FILE *fp, T this, IIT_T map_iit, int *map_divint_crosstable, U
/* queryaaseq is used only by PMAP */
void
-Stage3_print_alignment (FILE *fp, T this, Genome_T genome,
+Stage3_print_alignment (Filestring_T fp, T this, Genome_T genome,
Univ_IIT_T chromosome_iit, Printtype_T printtype,
- bool continuousp, bool continuous_by_exon_p, bool diagnosticp, bool genomefirstp,
+ bool continuousp, bool continuous_by_exon_p, bool genomefirstp,
int invertmode, bool nointronlenp, int wraplength) {
if (continuous_by_exon_p == true) {
Pair_print_exonsummary(fp,this->pairarray,this->npairs,this->chrnum,this->chroffset,
genome,chromosome_iit,this->watsonp,this->cdna_direction,genomefirstp,invertmode);
- Pair_print_continuous_byexon(fp,this->pairarray,this->npairs,this->watsonp,diagnosticp,invertmode);
+ Pair_print_continuous_byexon(fp,this->pairarray,this->npairs,this->watsonp,invertmode);
} else if (continuousp == true) {
-
-#if 0
- if (maponlyp == false) {
-#ifdef PMAP
- Stage3_translate_cdna(this,queryaaseq,strictp);
- Stage3_backtranslate_cdna(this,diagnosticp);
-#endif
- }
-#endif
-
Pair_print_continuous(fp,this->pairarray,this->npairs,this->watsonp,
- diagnosticp,genomefirstp,invertmode,nointronlenp);
+ genomefirstp,invertmode,nointronlenp);
} else {
/* Assumes Stage3_print_pathsummary already called */
Pair_print_exonsummary(fp,this->pairarray,this->npairs,this->chrnum,this->chroffset,
genome,chromosome_iit,this->watsonp,this->cdna_direction,genomefirstp,invertmode);
if (printtype == ALIGNMENT) {
Pair_print_alignment(fp,this->pairarray,this->npairs,this->chrnum,this->chroffset,
- chromosome_iit,this->watsonp,diagnosticp,invertmode,nointronlenp,wraplength);
+ chromosome_iit,this->watsonp,invertmode,nointronlenp,wraplength);
}
}
debug1(Pair_dump_array(this->pairarray,this->npairs,/*zerobasedp*/true));
@@ -5772,7 +5809,7 @@ Stage3_print_alignment (FILE *fp, T this, Genome_T genome,
void
-Stage3_print_coordinates (FILE *fp, T this, Univ_IIT_T chromosome_iit, int invertmode) {
+Stage3_print_coordinates (Filestring_T fp, T this, Univ_IIT_T chromosome_iit, int invertmode) {
Pair_print_coordinates(fp,this->pairarray,this->npairs,this->chrnum,this->chroffset,
chromosome_iit,this->watsonp,invertmode);
return;
@@ -5780,7 +5817,7 @@ Stage3_print_coordinates (FILE *fp, T this, Univ_IIT_T chromosome_iit, int inver
void
-Stage3_print_cdna (FILE *fp, T this, int wraplength) {
+Stage3_print_cdna (Filestring_T fp, T this, int wraplength) {
#ifdef PMAP
Pair_print_nucleotide_cdna(fp,this->pairarray,this->npairs,wraplength);
#else
@@ -5794,7 +5831,7 @@ Stage3_print_cdna (FILE *fp, T this, int wraplength) {
}
void
-Stage3_print_protein_genomic (FILE *fp, T this, int wraplength) {
+Stage3_print_protein_genomic (Filestring_T fp, T this, int wraplength) {
if (this->cdna_direction >= 0) {
Pair_print_protein_genomic(fp,this->pairarray,this->npairs,wraplength,/*forwardp*/true);
} else {
@@ -5805,7 +5842,7 @@ Stage3_print_protein_genomic (FILE *fp, T this, int wraplength) {
void
-Stage3_print_compressed (FILE *fp, T this, Sequence_T queryseq, Univ_IIT_T chromosome_iit,
+Stage3_print_compressed (Filestring_T fp, T this, Sequence_T queryseq, Univ_IIT_T chromosome_iit,
char *dbversion, Sequence_T usersegment, int pathnum, int npaths,
bool checksump, int chimerapos, int chimeraequivpos,
double donor_prob, double acceptor_prob, int chimera_cdna_direction) {
@@ -5814,7 +5851,7 @@ Stage3_print_compressed (FILE *fp, T this, Sequence_T queryseq, Univ_IIT_T chrom
#if 0
#ifdef PMAP
Stage3_translate_cdna(this,queryseq,strictp);
- Stage3_backtranslate_cdna(this,/*diagnosticp*/false);
+ Stage3_backtranslate_cdna(this);
#else
if (truncatep == true) {
truncate_fulllength(this,/*translatep*/true,/*cds_startpos*/-1,
@@ -5876,7 +5913,9 @@ compute_introntype (char left1, char left2, char right2, char right1) {
}
#endif
+#if 0
static char uppercaseCode[128] = UPPERCASE_U2T;
+#endif
#if 0
static List_T
@@ -6199,13 +6238,11 @@ peel_leftward_old (bool *mismatchp, List_T *peeled_path, List_T path, int *query
static List_T
-peel_leftward (int *n_peeled_indels, bool *protectedp, List_T *peeled_path, List_T path, int *querydp5, int *genomedp5,
+peel_leftward (int *n_peeled_indels, bool *protectedp, List_T *peeled_path, List_T path, int *querydp5, Chrpos_T *genomedp5,
int maxpeelback, bool stop_at_indels_p) {
- List_T peeled = NULL, rest = NULL, pairptr;
- Pair_T pair, nextpair, rightpair;
- int npeelback = 0, nconsecutive = 0, init_dynprogindex = DYNPROGINDEX_MINOR, niter;
- bool stopp;
- int nmatches;
+ List_T peeled = NULL;
+ Pair_T pair, rightpair;
+ int npeelback = 0, niter;
#if 0
int nincursion = 0;
#endif
@@ -6663,13 +6700,11 @@ peel_rightward_old (bool *mismatchp, List_T *peeled_pairs, List_T pairs, int *qu
static List_T
-peel_rightward (int *n_peeled_indels, bool *protectedp, List_T *peeled_pairs, List_T pairs, int *querydp3, int *genomedp3,
+peel_rightward (int *n_peeled_indels, bool *protectedp, List_T *peeled_pairs, List_T pairs, int *querydp3, Chrpos_T *genomedp3,
int maxpeelback, bool stop_at_indels_p) {
- List_T peeled = NULL, rest = NULL, pairptr;
- Pair_T pair, nextpair, leftpair;
- int npeelback = 0, nconsecutive = 0, init_dynprogindex = DYNPROGINDEX_MINOR, niter;
- bool stopp;
- int nmatches;
+ List_T peeled = NULL;
+ Pair_T pair, leftpair;
+ int npeelback = 0, niter;
#if 0
int incursion = 0;
#endif
@@ -6825,17 +6860,16 @@ traverse_single_gap (bool *filledp, int *dynprogindex, List_T pairs, List_T *pat
char *queryseq_ptr, char *queryuc_ptr, int querylength,
bool watsonp, bool jump_late_p, Pairpool_T pairpool, Dynprog_T dynprog,
Chrpos_T *last_genomedp5, Chrpos_T *last_genomedp3,
- int maxpeelback, int extraband_single, double defect_rate, int close_indels_mode,
- bool forcep, bool finalp) {
+ int maxpeelback, double defect_rate, bool forcep, bool finalp) {
List_T gappairs, peeled_pairs, peeled_path;
int queryjump, genomejump;
- int querydp5, genomedp5, querydp3, genomedp3;
+ int querydp5, querydp3;
+ Chrpos_T genomedp5, genomedp3;
int nmatches, nmismatches, nopens, nindels;
int unknowns, qopens, qindels, topens, tindels, ncanonical, nsemicanonical, nnoncanonical;
int finalscore, origscore;
- bool mismatchp = false, protectedp;
+ bool protectedp;
int n_peeled_indels;
- Pair_T gappair;
double min_splice_prob;
/* int origqueryjump, origgenomejump; */
@@ -6983,16 +7017,15 @@ traverse_cdna_gap (bool *filledp, bool *incompletep, int *dynprogindex_minor, in
int cdna_direction, bool watsonp, bool jump_late_p, Pairpool_T pairpool,
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Chrpos_T *last_genomedp5, Chrpos_T *last_genomedp3,
- int maxpeelback, int extramaterial_paired, int extraband_paired, int extraband_single,
- double defect_rate, int close_indels_mode, bool finalp) {
+ int maxpeelback, double defect_rate, bool finalp) {
List_T gappairs, peeled_pairs = NULL, peeled_path = NULL;
int queryjump, genomejump;
- int querydp5, genomedp5, querydp3, genomedp3;
+ int querydp5, querydp3;
+ Chrpos_T genomedp5, genomedp3;
int finalscore;
int nmatches, nmismatches, nopens, nindels;
- bool mismatchp = false, throughmismatchp, protectedp;
+ bool protectedp;
int n_peeled_indels;
- Pair_T gappair;
debug(printf("\nTRAVERSE_CDNA_GAP\n"));
querydp5 = leftpair->querypos + 1;
@@ -7002,13 +7035,15 @@ traverse_cdna_gap (bool *filledp, bool *incompletep, int *dynprogindex_minor, in
querydp3 = rightpair->querypos - 1;
genomedp3 = rightpair->genomepos - 1;
+#if 0
if (leftpair->dynprogindex < 0 && leftpair->dynprogindex == rightpair->dynprogindex) {
debug(printf("Re-peeling prior solution\n"));
- throughmismatchp = false;
+ /* throughmismatchp = false; */
} else {
debug(printf("No prior solution\n"));
- throughmismatchp = true;
+ /* throughmismatchp = true; */
}
+#endif
protectedp = false;
pairs = peel_rightward(&n_peeled_indels,&protectedp,&peeled_pairs,pairs,&querydp3,&genomedp3,
@@ -7105,18 +7140,16 @@ traverse_genome_gap (bool *filledp, bool *shiftp, int *dynprogindex_minor, int *
int cdna_direction, bool watsonp, bool jump_late_p,
Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Chrpos_T *last_genomedp5, Chrpos_T *last_genomedp3,
- int maxpeelback, int extramaterial_paired, int extraband_paired, int extraband_single,
- double defect_rate, int close_indels_mode, bool finalp, bool simplep) {
- List_T gappairs, gappairs_alt, peeled_pairs = NULL, peeled_path = NULL, p;
+ int maxpeelback, double defect_rate, bool finalp, bool simplep) {
+ List_T gappairs, peeled_pairs = NULL, peeled_path = NULL, p;
Pair_T pair;
int queryjump, genomejump;
- int querydp5, genomedp5, querydp3, genomedp3;
+ int querydp5, querydp3;
+ Chrpos_T genomedp5, genomedp3;
int new_leftgenomepos, new_rightgenomepos;
- double left_prob, right_prob, left_prob_alt, right_prob_alt;
- int finalscore, finalscore_alt, nmatches, nmismatches, nopens, nindels, exonhead, introntype;
- int nmismatches_alt;
+ double left_prob, right_prob;
+ int finalscore, nmatches, nmismatches, nopens, nindels, exonhead, introntype;
int acceptable_nmismatches;
- bool mismatch_rightward_p = false, mismatch_leftward_p = false, throughmismatchp;
bool stop_at_indels_p, protectedp;
int n_peeled_indels_rightward, n_peeled_indels_leftward;
double prob2, prob3;
@@ -7128,7 +7161,6 @@ traverse_genome_gap (bool *filledp, bool *shiftp, int *dynprogindex_minor, int *
#ifdef SHORTCUT
char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
#endif
- Pair_T gappair;
debug(printf("\nTRAVERSE_GENOME_GAP\n"));
@@ -7141,13 +7173,15 @@ traverse_genome_gap (bool *filledp, bool *shiftp, int *dynprogindex_minor, int *
querydp3 = rightpair->querypos - 1;
genomedp3 = rightpair->genomepos - 1;
+#if 0
if (leftpair->dynprogindex < 0 && leftpair->dynprogindex == rightpair->dynprogindex) {
debug(printf("Re-peeling prior solution\n"));
- throughmismatchp = false;
+ /* throughmismatchp = false; */
} else {
debug(printf("No prior solution\n"));
- throughmismatchp = true;
+ /* throughmismatchp = true; */
}
+#endif
#ifdef SHORTCUT
queryjump = querydp3 - querydp5 + 1;
@@ -7471,12 +7505,12 @@ traverse_dual_genome_gap (int *dynprogindex, List_T pairs, List_T *path,
char *queryseq_ptr, char *queryuc_ptr, int querylength, int cdna_direction, bool watsonp,
bool jump_late_p, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogR,
Chrpos_T *last_genomedp5, Chrpos_T *last_genomedp3,
- int maxpeelback, int nullgap, int extramaterial_paired, int extraband_paired,
- double defect_rate, bool simplep, bool finalp) {
+ int maxpeelback, double defect_rate, bool finalp) {
List_T single_gappairs, dual_gappairs_1 = NULL, dual_gappairs_2 = NULL,
right_gappairs = NULL, left_gappairs = NULL, peeled_pairs, peeled_path;
int queryjump, genomejump;
- int querydp5, genomedp5, querydp3, genomedp3;
+ int querydp5, querydp3;
+ Chrpos_T genomedp5, genomedp3;
int new_leftgenomepos, new_rightgenomepos;
double single_left_prob, single_right_prob, dual_left_prob_1, dual_right_prob_1, dual_left_prob_2, dual_right_prob_2;
int querydp5_dual, querydp3_dual, genomedp5_dual, genomedp3_dual;
@@ -7487,15 +7521,14 @@ traverse_dual_genome_gap (int *dynprogindex, List_T pairs, List_T *path,
nmismatches, nopens, nindels, exonhead, right_exonhead, left_exonhead;
int left_score, right_score, left_goodness = 0, right_goodness = 0;
int middle_exonlength, interexon_region;
- int single_introntype, dual_introntype_1, dual_introntype_2, left_introntype, right_introntype,
- canonical_introntype, semicanonical_introntype_1, semicanonical_introntype_2;
+ int single_introntype, dual_introntype_1, dual_introntype_2, left_introntype, right_introntype;
double middle_exonprob;
- bool singlep = false, mismatchp = false, single_canonical_p, dual_canonical_p, protectedp;
+ bool singlep = false, single_canonical_p, dual_canonical_p, protectedp;
int n_peeled_indels;
- Pair_T gappair;
debug(printf("\nTRAVERSE_DUAL_GENOME_GAP: left_end_intron_p %d, right_end_intron_p %d\n",
left_end_intron_p,right_end_intron_p));
+#if 0
if (cdna_direction > 0) {
canonical_introntype = GTAG_FWD;
semicanonical_introntype_1 = ATAC_FWD;
@@ -7507,6 +7540,7 @@ traverse_dual_genome_gap (int *dynprogindex, List_T pairs, List_T *path,
semicanonical_introntype_2 = GCAG_REV;
#endif
}
+#endif
querydp5 = leftpair->querypos + 1;
genomedp5 = leftpair->genomepos + 1;
@@ -7860,6 +7894,7 @@ traverse_dual_genome_gap (int *dynprogindex, List_T pairs, List_T *path,
}
+#if 0
static bool
good_end_intron_p (Pair_T gappair, int cdna_direction) {
if (gappair->knowngapp == true) {
@@ -7886,6 +7921,7 @@ good_end_intron_p (Pair_T gappair, int cdna_direction) {
}
}
}
+#endif
/* Note on QUERYEND_INDELS. Profiling shows that using
@@ -7899,29 +7935,28 @@ good_end_intron_p (Pair_T gappair, int cdna_direction) {
static List_T
distalmedial_ending5 (bool *knownsplicep, bool *chop_exon_p, int *dynprogindex_minor,
- int *finalscore, int *ambig_end_length, Splicetype_T *ambig_splicetype, double *ambig_prob,
- List_T *pairs, int leftquerypos, int leftgenomepos, Pair_T rightpair,
+ int *finalscore, int *ambig_end_length, double *ambig_prob,
+ List_T *pairs, int leftquerypos, Pair_T rightpair,
Univcoord_T chroffset, Univcoord_T chrhigh,
- Univcoord_T knownsplice_limit_low, Univcoord_T knownsplice_limit_high,
char *queryseq_ptr, char *queryuc_ptr,
int cdna_direction, bool watsonp, bool jump_late_p, Pairpool_T pairpool,
- Dynprog_T dynprog, int maxpeelback, int extramaterial_end,
- int extraband_end, double defect_rate) {
- List_T peeled_pairs, endgappairs, continuous_gappairs_medialgap = NULL;
+ Dynprog_T dynprog, int maxpeelback, double defect_rate) {
+ List_T peeled_pairs, continuous_gappairs_medialgap = NULL;
int queryjump, genomejump;
- int querydp5, genomedp5, querydp3_distalgap, genomedp3_distalgap, querydp3_medialgap, genomedp3_medialgap;
+ int querydp5, querydp3_distalgap, querydp3_medialgap;
+ Chrpos_T genomedp3_distalgap, genomedp3_medialgap;
int continuous_goodness_distalgap = 0, continuous_goodness_medialgap = 0,
nmatches, nmismatches, nopens, nindels;
- bool mismatchp = false, protectedp;
+ bool protectedp;
int n_peeled_indels;
- Pair_T gappair;
bool knownsplice_medial_p = false;
- Endalign_T endalign;
debug(printf("\nDISTALMEDIAL_ENDING5\n"));
querydp5 = leftquerypos + 1;
+#if 0
genomedp5 = leftgenomepos + 1; /* 0 */
+#endif
querydp3_distalgap = querydp3_medialgap = rightpair->querypos - 1;
genomedp3_distalgap = genomedp3_medialgap = rightpair->genomepos - 1;
@@ -7930,121 +7965,72 @@ distalmedial_ending5 (bool *knownsplicep, bool *chop_exon_p, int *dynprogindex_m
protectedp = false;
*pairs = peel_rightward(&n_peeled_indels,&protectedp,&peeled_pairs,*pairs,&querydp3_distalgap,&genomedp3_distalgap,
maxpeelback,/*stop_at_indels_p*/true);
- if (0 && endgappairs == NULL) {
- *chop_exon_p = false;
- return peeled_pairs;
- } else {
+ continuous_goodness_distalgap = Pair_fracidentity_score(peeled_pairs,cdna_direction);
+ /* continuous_goodness_distalgap += Pair_fracidentity_score(endgappairs,cdna_direction); */
+ debug(printf("continuous_goodness_distalgap (%d pairs) is %d\n",
+ List_length(peeled_pairs),continuous_goodness_distalgap));
+
#if 0
- continuous_goodness_distalgap = Pair_fracidentity_max(&changepoint,peeled_pairs,cdna_direction);
-#else
- continuous_goodness_distalgap = Pair_fracidentity_score(peeled_pairs,cdna_direction);
- /* continuous_goodness_distalgap += Pair_fracidentity_score(endgappairs,cdna_direction); */
+ /* gappair wasn't initialized */
+ if (good_end_intron_p(gappair,cdna_direction) == false) {
+ debug(printf("Subtracting points from continuous distal because noncanonical\n"));
+ continuous_goodness_distalgap -= CANONICAL_POINTS;
+ } else if (gappair->comp == DUALBREAK_COMP) {
+ debug(printf("Subtracting points from continuous distal because of dual break\n"));
+ continuous_goodness_distalgap -= (CANONICAL_POINTS + CANONICAL_POINTS);
+ }
#endif
- debug(printf("continuous_goodness_distalgap (%d+%d pairs) is %d, with gapcomp '%c' with probs %f and %f\n",
- List_length(peeled_pairs),List_length(endgappairs),continuous_goodness_distalgap,
- gappair->comp,gappair->donor_prob,gappair->acceptor_prob));
- if (good_end_intron_p(gappair,cdna_direction) == false) {
- debug(printf("Subtracting points from continuous distal because noncanonical\n"));
- continuous_goodness_distalgap -= CANONICAL_POINTS;
- } else if (gappair->comp == DUALBREAK_COMP) {
- debug(printf("Subtracting points from continuous distal because of dual break\n"));
- continuous_goodness_distalgap -= (CANONICAL_POINTS + CANONICAL_POINTS);
- }
-
- /* Solve if gap were not present */
- queryjump = querydp3_medialgap - querydp5 + 1;
- genomejump = queryjump + extramaterial_end; /* proposed */
- /* Previously, we limited genomejump = min(2*queryjump,queryjump+extramaterial_end) */
+ /* Solve if gap were not present */
+ queryjump = querydp3_medialgap - querydp5 + 1;
+ genomejump = queryjump + extramaterial_end; /* proposed */
+ /* Previously, we limited genomejump = min(2*queryjump,queryjump+extramaterial_end) */
- genomedp5 = genomedp3_medialgap - genomejump + 1;
#ifdef EXTRACT_GENOMICSEG
- /* Make sure we don't go past the beginning */
- if (genomedp5 < 0) {
- genomedp5 = 0;
- genomejump = genomedp3_medialgap - genomedp5 + 1;
- }
+ genomedp5 = genomedp3_medialgap - genomejump + 1;
+ /* Make sure we don't go past the beginning */
+ if (genomedp5 < 0) {
+ genomedp5 = 0;
+ genomejump = genomedp3_medialgap - genomedp5 + 1;
+ }
#endif
- debug(printf("Stage 3 (dir %d): traverse_ending5: Dynamic programming at 5' end (medial to gap): querydp5 = %d, querydp3 = %d, genomedp5 = %d, genomedp3 = %d\n",
- cdna_direction,querydp5,querydp3_medialgap,genomedp5,genomedp3_medialgap));
-
- if (0 /* genomedp3_medialgap > genomiclength */) {
- debug(printf("Not feasible to do medial gap\n"));
- *ambig_end_length = 0;
- *ambig_prob = 0.0;
-
- *pairs = Pairpool_transfer(*pairs,endgappairs);
- *chop_exon_p = false;
- /* Let previous value of knownsplicep stand */
-
- return peeled_pairs;
-
- } else {
- debug(printf("Before solving the 5' end, here are the pairs:\n"));
- debug(Pair_dump_list(*pairs,true));
- debug(printf("\n"));
-
- if (0 && splicesites != NULL) {
- /* Use only for extend_ending5 */
- continuous_gappairs_medialgap = Dynprog_end5_known(&knownsplice_medial_p,&(*dynprogindex_minor),&(*finalscore),
- &(*ambig_end_length),&(*ambig_splicetype),
- &nmatches,&nmismatches,&nopens,&nindels,dynprog,
- &(queryseq_ptr[querydp3_medialgap]),&(queryuc_ptr[querydp3_medialgap]),
- queryjump,genomejump,querydp3_medialgap,genomedp3_medialgap,
- chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
- cdna_direction,watsonp,jump_late_p,pairpool,
- extraband_end,defect_rate);
- if (*ambig_end_length > 0) {
- *ambig_prob = 2.0;
- }
- } else {
- continuous_gappairs_medialgap = Dynprog_end5_gap(&(*dynprogindex_minor),&(*finalscore),
- &nmatches,&nmismatches,&nopens,&nindels,dynprog,
- &(queryseq_ptr[querydp3_medialgap]),&(queryuc_ptr[querydp3_medialgap]),
- queryjump,genomejump,querydp3_medialgap,genomedp3_medialgap,
- chroffset,chrhigh,cdna_direction,watsonp,jump_late_p,pairpool,
- extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS);
- *ambig_end_length = 0;
- *ambig_prob = 0.0;
- }
+ debug(printf("Stage 3 (dir %d): traverse_ending5: Dynamic programming at 5' end (medial to gap): querydp5 = %d, querydp3 = %d, genomedp3 = %d\n",
+ cdna_direction,querydp5,querydp3_medialgap,genomedp3_medialgap));
- continuous_goodness_medialgap = nmatches + MISMATCH*nmismatches + QOPEN*nopens + QINDEL*nindels;
- debug(printf("Continuous_goodness_medialgap %d = %d + %d*%d + %d*%d + %d*%d\n",
- continuous_goodness_medialgap,nmatches,MISMATCH,nmismatches,QOPEN,nopens,QINDEL,nindels));
+ continuous_gappairs_medialgap = Dynprog_end5_gap(&(*dynprogindex_minor),&(*finalscore),
+ &nmatches,&nmismatches,&nopens,&nindels,dynprog,
+ &(queryseq_ptr[querydp3_medialgap]),&(queryuc_ptr[querydp3_medialgap]),
+ queryjump,genomejump,querydp3_medialgap,genomedp3_medialgap,
+ chroffset,chrhigh,cdna_direction,watsonp,jump_late_p,pairpool,
+ extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS);
+ *ambig_end_length = 0;
+ *ambig_prob = 0.0;
- if (continuous_goodness_distalgap > continuous_goodness_medialgap) {
- debug(printf("Continuous distal wins: %d > %d\n",continuous_goodness_distalgap,continuous_goodness_medialgap));
- *ambig_end_length = 0;
- *ambig_prob = 0.0;
+ continuous_goodness_medialgap = nmatches + MISMATCH*nmismatches + QOPEN*nopens + QINDEL*nindels;
+ debug(printf("Continuous_goodness_medialgap %d = %d + %d*%d + %d*%d + %d*%d\n",
+ continuous_goodness_medialgap,nmatches,MISMATCH,nmismatches,QOPEN,nopens,QINDEL,nindels));
+
+ if (continuous_goodness_distalgap > continuous_goodness_medialgap) {
+ debug(printf("Continuous distal wins: %d > %d\n",continuous_goodness_distalgap,continuous_goodness_medialgap));
+ *ambig_end_length = 0;
+ *ambig_prob = 0.0;
-#if 0
- debug(printf("Before transferring endgappairs, here is pairs:\n"));
- debug(Pair_dump_list(*pairs,true));
- debug(printf("\n"));
-#endif
+ /* *pairs = Pairpool_transfer(*pairs,endgappairs); */
+ *chop_exon_p = false;
+ /* Let previous value of knownsplicep stand */
+ debug(printf("Returning peeled pairs:\n"));
+ debug(Pair_dump_list(peeled_pairs,true));
+ debug(printf("\n"));
+ return peeled_pairs;
- *pairs = Pairpool_transfer(*pairs,endgappairs);
- *chop_exon_p = false;
- /* Let previous value of knownsplicep stand */
-#if 0
- changepoint = Pair_fracidentity_changepoint(peeled_pairs,cdna_direction);
- return List_truncate(peeled_pairs,changepoint);
-#else
- debug(printf("Returning peeled pairs:\n"));
- debug(Pair_dump_list(peeled_pairs,true));
- debug(printf("\n"));
- return peeled_pairs;
-#endif
- } else {
- debug(printf("Continuous medial wins: %d > %d\n",
- continuous_goodness_medialgap,continuous_goodness_distalgap));
- *chop_exon_p = true;
- *knownsplicep = knownsplice_medial_p;
- return continuous_gappairs_medialgap;
- }
- }
+ } else {
+ debug(printf("Continuous medial wins: %d > %d\n",
+ continuous_goodness_medialgap,continuous_goodness_distalgap));
+ *chop_exon_p = true;
+ *knownsplicep = knownsplice_medial_p;
+ return continuous_gappairs_medialgap;
}
}
@@ -8052,27 +8038,29 @@ distalmedial_ending5 (bool *knownsplicep, bool *chop_exon_p, int *dynprogindex_m
static List_T
extend_ending5 (bool *knownsplicep, int *dynprogindex_minor,
int *finalscore, int *ambig_end_length, Splicetype_T *ambig_splicetype, double *ambig_prob,
- List_T *pairs, int leftquerypos, int leftgenomepos, Pair_T rightpair,
+ List_T *pairs, int leftquerypos, Pair_T rightpair,
Univcoord_T chroffset, Univcoord_T chrhigh,
Univcoord_T knownsplice_limit_low, Univcoord_T knownsplice_limit_high,
char *queryseq_ptr, char *queryuc_ptr,
int cdna_direction, bool watsonp, bool jump_late_p, Pairpool_T pairpool,
- Dynprog_T dynprog, int maxpeelback, int extramaterial_end,
- int extraband_end, double defect_rate, Endalign_T endalign) {
+ Dynprog_T dynprog, int maxpeelback, double defect_rate, Endalign_T endalign) {
List_T continuous_gappairs_distalgap = NULL, peeled_pairs;
int queryjump, genomejump;
- int querydp5, genomedp5, querydp3_distalgap, genomedp3_distalgap;
+ int querydp5, querydp3_distalgap;
+ Chrpos_T genomedp3_distalgap;
int nmatches, nmismatches, nopens, nindels;
- bool mismatchp = false, protectedp = false;
+ bool protectedp = false;
int n_peeled_indels = 0;
- Pair_T gappair, firstpair;
+ Pair_T firstpair;
debug(printf("\nEXTEND_ENDING5 with endalign %s and maxpeelback %d\n",
Dynprog_endalign_string(endalign),maxpeelback));
querydp5 = leftquerypos + 1;
+#if 0
genomedp5 = leftgenomepos + 1; /* 0 */
+#endif
querydp3_distalgap = rightpair->querypos - 1;
genomedp3_distalgap = rightpair->genomepos - 1;
@@ -8094,7 +8082,9 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor,
genomejump = queryjump + extramaterial_end; /* proposed */
/* Previously, we limited genomejump = min(2*queryjump,queryjump+extramaterial_end) */
+#if 0
genomedp5 = genomedp3_distalgap - genomejump + 1;
+#endif
#ifdef EXTRACT_GENOMICSEG
/* Make sure we don't go past the beginning */
if (genomedp5 < 0) {
@@ -8103,8 +8093,8 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor,
}
#endif
- debug(printf("Stage 3 (dir %d), extend_ending5: Dynamic programming at 5' end (distal to gap): querydp5 = %d, querydp3 = %d, genomedp5 = %d, genomedp3 = %d\n",
- cdna_direction,querydp5,querydp3_distalgap,genomedp5,genomedp3_distalgap));
+ debug(printf("Stage 3 (dir %d), extend_ending5: Dynamic programming at 5' end (distal to gap): querydp5 = %d, querydp3 = %d, genomedp3 = %d\n",
+ cdna_direction,querydp5,querydp3_distalgap,genomedp3_distalgap));
if (endalign == QUERYEND_GAP && splicesites != NULL) {
@@ -8158,24 +8148,21 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor,
static List_T
distalmedial_ending3 (bool *knownsplicep, bool *chop_exon_p, int *dynprogindex_minor,
- int *finalscore, int *ambig_end_length, Splicetype_T *ambig_splicetype, double *ambig_prob,
- List_T *path, Pair_T leftpair, int rightquerypos, int querylength,
+ int *finalscore, int *ambig_end_length, double *ambig_prob,
+ List_T *path, Pair_T leftpair, int rightquerypos,
Univcoord_T chroffset, Univcoord_T chrhigh,
- Univcoord_T knownsplice_limit_low, Univcoord_T knownsplice_limit_high,
char *queryseq_ptr, char *queryuc_ptr,
int cdna_direction, bool watsonp, bool jump_late_p,
- Pairpool_T pairpool, Dynprog_T dynprog, int maxpeelback, int extramaterial_end,
- int extraband_end, double defect_rate) {
- List_T peeled_path, endgappairs, continuous_gappairs_medialgap = NULL;
+ Pairpool_T pairpool, Dynprog_T dynprog, int maxpeelback, double defect_rate) {
+ List_T peeled_path, continuous_gappairs_medialgap = NULL;
int queryjump, genomejump;
- int querydp5_distalgap, genomedp5_distalgap, querydp3, querydp5_medialgap, genomedp5_medialgap;
+ int querydp5_distalgap, querydp3, querydp5_medialgap;
+ Chrpos_T genomedp5_distalgap, genomedp5_medialgap;
int continuous_goodness_distalgap = 0, continuous_goodness_medialgap = 0,
nmatches, nmismatches, nopens, nindels;
- bool mismatchp = false, protectedp;
+ bool protectedp;
int n_peeled_indels;
bool knownsplice_medial_p = false;
- Pair_T gappair;
- Endalign_T endalign;
debug(printf("\nDISTALMEDIAL_ENDING3\n"));
@@ -8195,121 +8182,75 @@ distalmedial_ending3 (bool *knownsplicep, bool *chop_exon_p, int *dynprogindex_m
*path = peel_leftward(&n_peeled_indels,&protectedp,&peeled_path,*path,&querydp5_distalgap,&genomedp5_distalgap,
maxpeelback,/*stop_at_indels_p*/true);
- if (0 && endgappairs == NULL) {
- *chop_exon_p = false;
- return peeled_path;
+ continuous_goodness_distalgap = Pair_fracidentity_score(peeled_path,cdna_direction);
+ /* continuous_goodness_distalgap += Pair_fracidentity_score(endgappairs,cdna_direction); */
+ debug(printf("continuous_goodness_distalgap (%d pairs) is %d\n",
+ List_length(peeled_path),continuous_goodness_distalgap));
- } else {
#if 0
- continuous_goodness_distalgap = Pair_fracidentity_max(&changepoint,peeled_path,cdna_direction);
-#else
- continuous_goodness_distalgap = Pair_fracidentity_score(peeled_path,cdna_direction);
- /* continuous_goodness_distalgap += Pair_fracidentity_score(endgappairs,cdna_direction); */
+ /* gappair wasn't initialized */
+ if (good_end_intron_p(gappair,cdna_direction) == false) {
+ debug(printf("Subtracting points from continuous distal because noncanonical\n"));
+ continuous_goodness_distalgap -= CANONICAL_POINTS;
+ } else if (gappair->comp == DUALBREAK_COMP) {
+ debug(printf("Subtracting points from continuous distal because of dual break\n"));
+ continuous_goodness_distalgap -= (CANONICAL_POINTS + CANONICAL_POINTS);
+ }
#endif
- debug(printf("continuous_goodness_distalgap (%d+%d pairs) is %d, with gapcomp '%c', probs %f and %f\n",
- List_length(peeled_path),List_length(endgappairs),continuous_goodness_distalgap,
- gappair->comp,gappair->donor_prob,gappair->acceptor_prob));
- if (good_end_intron_p(gappair,cdna_direction) == false) {
- debug(printf("Subtracting points from continuous distal because noncanonical\n"));
- continuous_goodness_distalgap -= CANONICAL_POINTS;
- } else if (gappair->comp == DUALBREAK_COMP) {
- debug(printf("Subtracting points from continuous distal because of dual break\n"));
- continuous_goodness_distalgap -= (CANONICAL_POINTS + CANONICAL_POINTS);
- }
-
- /* Solve if gap were not present */
- queryjump = querydp3 - querydp5_medialgap + 1;
- genomejump = queryjump + extramaterial_end; /* proposed */
- /* Previously, we limited genomejump = min(2*queryjump,queryjump+extramaterial_end) */
+ /* Solve if gap were not present */
+ queryjump = querydp3 - querydp5_medialgap + 1;
+ genomejump = queryjump + extramaterial_end; /* proposed */
+ /* Previously, we limited genomejump = min(2*queryjump,queryjump+extramaterial_end) */
- /* genomedp3 = genomedp5_medialgap + genomejump - 1; */
#ifdef EXTRACT_GENOMICSEG
- /* Make sure we don't go past the end */
- if (genomedp3 > genomiclength - 1) {
- genomedp3 = genomiclength - 1;
- genomejump = genomedp3 - genomedp5_medialgap + 1;
- }
+ genomedp3 = genomedp5_medialgap + genomejump - 1;
+ /* Make sure we don't go past the end */
+ if (genomedp3 > genomiclength - 1) {
+ genomedp3 = genomiclength - 1;
+ genomejump = genomedp3 - genomedp5_medialgap + 1;
+ }
#endif
- debug(printf("Stage 3 (dir %d): distalmedial_ending3: Dynamic programming at 3' end (medial to gap): querydp5 = %d, querydp3 = %d, genomedp5 = %u\n",
- cdna_direction,querydp5_medialgap,querydp3,genomedp5_medialgap));
+ debug(printf("Stage 3 (dir %d): distalmedial_ending3: Dynamic programming at 3' end (medial to gap): querydp5 = %d, querydp3 = %d, genomedp5 = %u\n",
+ cdna_direction,querydp5_medialgap,querydp3,genomedp5_medialgap));
- if (genomedp5_medialgap < 0) {
- debug(printf("Not feasible to do medial gap\n"));
- *ambig_end_length = 0;
- *ambig_prob = 0.0;
-
- *path = Pairpool_transfer(*path,endgappairs);
- *chop_exon_p = false;
- /* Let previous value of knownsplicep stand */
-
- return peeled_path;
-
- } else {
- debug(printf("Before solving the 3' end, here is the path:\n"));
- debug(Pair_dump_list(*path,true));
- debug(printf("\n"));
-
- if (0 && splicesites != NULL) {
- continuous_gappairs_medialgap = Dynprog_end3_known(&knownsplice_medial_p,&(*dynprogindex_minor),&(*finalscore),
- &(*ambig_end_length),&(*ambig_splicetype),
- &nmatches,&nmismatches,&nopens,&nindels,dynprog,
- &(queryseq_ptr[querydp5_medialgap]),&(queryuc_ptr[querydp5_medialgap]),
- queryjump,genomejump,querydp5_medialgap,genomedp5_medialgap,
- querylength,chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
- cdna_direction,watsonp,jump_late_p,pairpool,
- extraband_end,defect_rate);
- if (*ambig_end_length > 0) {
- *ambig_prob = 2.0;
- }
- } else {
- continuous_gappairs_medialgap = Dynprog_end3_gap(&(*dynprogindex_minor),&(*finalscore),
- &nmatches,&nmismatches,&nopens,&nindels,dynprog,
- &(queryseq_ptr[querydp5_medialgap]),&(queryuc_ptr[querydp5_medialgap]),
- queryjump,genomejump,querydp5_medialgap,genomedp5_medialgap,
- chroffset,chrhigh,cdna_direction,watsonp,jump_late_p,pairpool,
- extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS);
- *ambig_end_length = 0;
- *ambig_prob = 0.0;
- }
-
- continuous_goodness_medialgap = nmatches + MISMATCH*nmismatches + QOPEN*nopens + QINDEL*nindels;
- debug(printf("Continuous_goodness_medialgap %d = %d + %d*%d + %d*%d + %d*%d\n",
- continuous_goodness_medialgap,nmatches,MISMATCH,nmismatches,QOPEN,nopens,QINDEL,nindels));
+ debug(printf("Before solving the 3' end, here is the path:\n"));
+ debug(Pair_dump_list(*path,true));
+ debug(printf("\n"));
- if (continuous_goodness_distalgap > continuous_goodness_medialgap) {
- debug(printf("Continuous distal wins: %d > %d\n",continuous_goodness_distalgap,continuous_goodness_medialgap));
- *ambig_end_length = 0;
- *ambig_prob = 0.0;
+ continuous_gappairs_medialgap = Dynprog_end3_gap(&(*dynprogindex_minor),&(*finalscore),
+ &nmatches,&nmismatches,&nopens,&nindels,dynprog,
+ &(queryseq_ptr[querydp5_medialgap]),&(queryuc_ptr[querydp5_medialgap]),
+ queryjump,genomejump,querydp5_medialgap,genomedp5_medialgap,
+ chroffset,chrhigh,cdna_direction,watsonp,jump_late_p,pairpool,
+ extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS);
+ *ambig_end_length = 0;
+ *ambig_prob = 0.0;
+
+ continuous_goodness_medialgap = nmatches + MISMATCH*nmismatches + QOPEN*nopens + QINDEL*nindels;
+ debug(printf("Continuous_goodness_medialgap %d = %d + %d*%d + %d*%d + %d*%d\n",
+ continuous_goodness_medialgap,nmatches,MISMATCH,nmismatches,QOPEN,nopens,QINDEL,nindels));
+
+ if (continuous_goodness_distalgap > continuous_goodness_medialgap) {
+ debug(printf("Continuous distal wins: %d > %d\n",continuous_goodness_distalgap,continuous_goodness_medialgap));
+ *ambig_end_length = 0;
+ *ambig_prob = 0.0;
-#if 0
- debug(printf("Before transferring endgappairs, here is path:\n"));
- debug(Pair_dump_list(*path,true));
- debug(printf("\n"));
-#endif
+ /* *path = Pairpool_transfer(*path,endgappairs); */
+ *chop_exon_p = false;
+ /* Let previous value of knownsplicep stand */
+ debug(printf("Returning peeled path:\n"));
+ debug(Pair_dump_list(peeled_path,true));
+ debug(printf("\n"));
+ return peeled_path;
- *path = Pairpool_transfer(*path,endgappairs);
- *chop_exon_p = false;
- /* Let previous value of knownsplicep stand */
-#if 0
- changepoint = Pair_fracidentity_changepoint(peeled_path,cdna_direction);
- return List_truncate(peeled_path,changepoint);
-#else
- debug(printf("Returning peeled path:\n"));
- debug(Pair_dump_list(peeled_path,true));
- debug(printf("\n"));
- return peeled_path;
-#endif
- } else {
- debug(printf("Continuous medial wins: %d > %d\n",continuous_goodness_medialgap,continuous_goodness_distalgap));
- *chop_exon_p = true;
- *knownsplicep = knownsplice_medial_p;
- return List_reverse(continuous_gappairs_medialgap);
- }
- }
+ } else {
+ debug(printf("Continuous medial wins: %d > %d\n",continuous_goodness_medialgap,continuous_goodness_distalgap));
+ *chop_exon_p = true;
+ *knownsplicep = knownsplice_medial_p;
+ return List_reverse(continuous_gappairs_medialgap);
}
-
}
@@ -8321,15 +8262,16 @@ extend_ending3 (bool *knownsplicep, int *dynprogindex_minor, int *finalscore,
Univcoord_T knownsplice_limit_low, Univcoord_T knownsplice_limit_high,
char *queryseq_ptr, char *queryuc_ptr,
int cdna_direction, bool watsonp, bool jump_late_p,
- Pairpool_T pairpool, Dynprog_T dynprog, int maxpeelback, int extramaterial_end,
- int extraband_end, double defect_rate, Endalign_T endalign) {
+ Pairpool_T pairpool, Dynprog_T dynprog, int maxpeelback,
+ double defect_rate, Endalign_T endalign) {
List_T continuous_gappairs_distalgap = NULL, peeled_path;
int queryjump, genomejump;
- int querydp5_distalgap, genomedp5_distalgap, querydp3;
+ int querydp5_distalgap, querydp3;
+ Chrpos_T genomedp5_distalgap;
int nmatches, nmismatches, nopens, nindels;
- bool mismatchp = false, protectedp = false;
+ bool protectedp = false;
int n_peeled_indels = 0;
- Pair_T gappair, firstpair;
+ Pair_T firstpair;
debug(printf("\nEXTEND_ENDING3 with endalign %s and maxpeelback %d\n",
Dynprog_endalign_string(endalign),maxpeelback));
@@ -8432,13 +8374,13 @@ traverse_dual_break (List_T pairs, List_T *path, Pair_T leftpair, Pair_T rightpa
char *queryseq_ptr, char *queryuc_ptr, int querylength,
bool watsonp, int genestrand, Pairpool_T pairpool, int maxpeelback,
Oligoindex_array_T oligoindices_minor,
- Diagpool_T diagpool, Cellpool_T cellpool,
- int sufflookback, int nsufflookback, int maxintronlen_bound) {
+ Diagpool_T diagpool, Cellpool_T cellpool) {
List_T gappairs, peeled_pairs = NULL, peeled_path = NULL;
- int querydp5, genomedp5, querydp3, genomedp3, source, indexsize;
- bool mismatchp, protectedp;
+ int querydp5, querydp3, source, indexsize;
+ Chrpos_T genomedp5, genomedp3;
+ bool protectedp;
int n_peeled_indels;
- Pair_T gappair, firstpair, lastpair;
+ Pair_T firstpair, lastpair;
Chrpos_T chrstart, chrend;
debug14(printf("\nTRAVERSE_DUAL_BREAK\n"));
@@ -8507,10 +8449,10 @@ traverse_dual_break (List_T pairs, List_T *path, Pair_T leftpair, Pair_T rightpa
chrstart,chrend,chroffset,chrhigh,/*plusp*/watsonp,genestrand,
oligoindices_minor,/*proceed_pctcoverage*/0.80,
- pairpool,diagpool,cellpool,sufflookback,nsufflookback,maxintronlen_bound,
+ pairpool,diagpool,cellpool,
/*localp should be false*/true,/*skip_repetitive_p*/false,
/*use_shifted_canonical_p*/true,/*favor_right_p*/false,
- /*debug_graphic_p*/false,/*diagnosticp*/false);
+ /*debug_graphic_p*/false);
debug14(printf("Internal stage2 result:\n"));
debug14(Pair_dump_list(gappairs,true));
@@ -8562,11 +8504,10 @@ build_dual_breaks (bool *dual_break_p, int *dynprogindex_minor, int *dynproginde
Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Chrpos_T *last_genomedp5, Chrpos_T *last_genomedp3,
int maxpeelback, Oligoindex_array_T oligoindices_minor,
- Diagpool_T diagpool, Cellpool_T cellpool, int sufflookback, int nsufflookback, int maxintronlen_bound,
- int extramaterial_paired, int extraband_paired, int extraband_single,
- double defect_rate, int close_indels_mode, bool finalp, bool simplep) {
+ Diagpool_T diagpool, Cellpool_T cellpool,
+ double defect_rate, bool finalp, bool simplep) {
- List_T pairs = NULL, pairptr;
+ List_T pairs = NULL;
Pair_T pair, leftpair, rightpair;
bool filledp, shiftp;
@@ -8586,7 +8527,7 @@ build_dual_breaks (bool *dual_break_p, int *dynprogindex_minor, int *dynproginde
#endif
queryseq_ptr,queryuc_ptr,querylength,watsonp,genestrand,
pairpool,maxpeelback,oligoindices_minor,
- diagpool,cellpool,sufflookback,nsufflookback,maxintronlen_bound);
+ diagpool,cellpool);
}
#endif
@@ -8607,7 +8548,7 @@ build_dual_breaks (bool *dual_break_p, int *dynprogindex_minor, int *dynproginde
path = Pairpool_pop(path,&pair);
} else {
- pairptr = path; /* save */
+ /* pairptr = path; -- save */
path = Pairpool_pop(path,&pair);
leftpair = path->first;
@@ -8624,8 +8565,7 @@ build_dual_breaks (bool *dual_break_p, int *dynprogindex_minor, int *dynproginde
chroffset,chrhigh,
queryseq_ptr,queryuc_ptr,querylength,watsonp,
jump_late_p,pairpool,dynprogM,last_genomedp5,last_genomedp3,
- maxpeelback,extraband_single,defect_rate,
- close_indels_mode,/*forcep*/true,/*finalp*/false);
+ maxpeelback,defect_rate,/*forcep*/true,/*finalp*/false);
} else if (pair->genomejump - pair->queryjump < SINGLESLEN &&
pair->queryjump - pair->genomejump < SINGLESLEN) {
@@ -8634,8 +8574,7 @@ build_dual_breaks (bool *dual_break_p, int *dynprogindex_minor, int *dynproginde
chroffset,chrhigh,
queryseq_ptr,queryuc_ptr,querylength,watsonp,
jump_late_p,pairpool,dynprogM,last_genomedp5,last_genomedp3,
- maxpeelback,extraband_single,defect_rate,
- close_indels_mode,/*forcep*/true,/*finalp*/false);
+ maxpeelback,defect_rate,/*forcep*/true,/*finalp*/false);
} else if (pair->queryjump < MIN_STAGE2_FOR_DUALBREAK) {
debug(printf(" Can be solved as a genome gap\n"));
@@ -8644,9 +8583,7 @@ build_dual_breaks (bool *dual_break_p, int *dynprogindex_minor, int *dynproginde
queryseq_ptr,queryuc_ptr,querylength,
cdna_direction,watsonp,jump_late_p,
pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5,last_genomedp3,
- maxpeelback,extramaterial_paired,
- extraband_paired,extraband_single,defect_rate,close_indels_mode,
- /*finalp*/false,simplep);
+ maxpeelback,defect_rate,/*finalp*/false,simplep);
} else {
debug(printf(" Solving as a dual break\n"));
@@ -8657,7 +8594,7 @@ build_dual_breaks (bool *dual_break_p, int *dynprogindex_minor, int *dynproginde
#endif
queryseq_ptr,queryuc_ptr,querylength,watsonp,genestrand,
pairpool,maxpeelback,oligoindices_minor,
- diagpool,cellpool,sufflookback,nsufflookback,maxintronlen_bound);
+ diagpool,cellpool);
}
}
}
@@ -8675,7 +8612,7 @@ build_dual_breaks (bool *dual_break_p, int *dynprogindex_minor, int *dynproginde
#endif
queryseq_ptr,queryuc_ptr,querylength,watsonp,genestrand,
pairpool,maxpeelback,oligoindices_minor,
- diagpool,cellpool,sufflookback,nsufflookback,maxintronlen_bound);
+ diagpool,cellpool);
}
#endif
@@ -8702,8 +8639,6 @@ build_path_end3 (bool *knownsplicep, int *ambig_end_length_3, Splicetype_T *ambi
Univcoord_T knownsplice_limit_low, Univcoord_T knownsplice_limit_high,
char *queryseq_ptr, char *queryuc_ptr,
int cdna_direction, bool watsonp, bool jump_late_p, int maxpeelback,
- int maxpeelback_distalmedial, int nullgap,
- int extramaterial_end, int extraband_end,
double defect_rate, Pairpool_T pairpool, Dynprog_T dynprogL,
bool extendp, Endalign_T endalign) {
List_T gappairs;
@@ -8770,18 +8705,17 @@ build_path_end3 (bool *knownsplicep, int *ambig_end_length_3, Splicetype_T *ambi
chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,pairpool,dynprogL,maxpeelback,
- extramaterial_end,extraband_end,defect_rate,endalign);
+ defect_rate,endalign);
} else {
/* Looks like we aren't calling this anymore */
abort();
debug(printf("Running distalmedial_ending3\n"));
gappairs = distalmedial_ending3(&(*knownsplicep),&(*chop_exon_p),&(*dynprogindex_minor),
- &finalscore,&(*ambig_end_length_3),&(*ambig_splicetype_3),&(*ambig_prob_3),
- &path,leftpair,rightquerypos,querylength,
- chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
+ &finalscore,&(*ambig_end_length_3),&(*ambig_prob_3),
+ &path,leftpair,rightquerypos,chroffset,chrhigh,
queryseq_ptr,queryuc_ptr,
- cdna_direction,watsonp,jump_late_p,pairpool,dynprogL,maxpeelback_distalmedial,
- extramaterial_end,extraband_end,defect_rate);
+ cdna_direction,watsonp,jump_late_p,pairpool,dynprogL,
+ maxpeelback,defect_rate);
}
debug(printf("Gappairs from build_path_end3:\n"));
@@ -8814,8 +8748,6 @@ build_pairs_end5 (bool *knownsplicep, int *ambig_end_length_5, Splicetype_T *amb
Univcoord_T knownsplice_limit_low, Univcoord_T knownsplice_limit_high,
char *queryseq_ptr, char *queryuc_ptr,
int cdna_direction, bool watsonp, bool jump_late_p, int maxpeelback,
- int maxpeelback_distalmedial, int nullgap,
- int extramaterial_end, int extraband_end,
double defect_rate, Pairpool_T pairpool, Dynprog_T dynprogR,
bool extendp, Endalign_T endalign) {
List_T gappairs;
@@ -8874,22 +8806,21 @@ build_pairs_end5 (bool *knownsplicep, int *ambig_end_length_5, Splicetype_T *amb
*chop_exon_p = false;
gappairs = extend_ending5(&(*knownsplicep),&(*dynprogindex_minor),
&finalscore,&(*ambig_end_length_5),&(*ambig_splicetype_5),&(*ambig_prob_5),
- &pairs,leftquerypos,/*leftgenomepos*/-1,rightpair,
+ &pairs,leftquerypos,rightpair,
chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
queryseq_ptr,queryuc_ptr,
- cdna_direction,watsonp,jump_late_p,pairpool,dynprogR,maxpeelback,
- extramaterial_end,extraband_end,defect_rate,endalign);
+ cdna_direction,watsonp,jump_late_p,pairpool,dynprogR,
+ maxpeelback,defect_rate,endalign);
} else {
/* Looks like we aren't calling this anymore */
abort();
debug(printf("Running distalmedial_ending5\n"));
gappairs = distalmedial_ending5(&(*knownsplicep),&(*chop_exon_p),&(*dynprogindex_minor),
- &finalscore,&(*ambig_end_length_5),&(*ambig_splicetype_5),&(*ambig_prob_5),
- &pairs,leftquerypos,/*leftgenomepos*/-1,rightpair,
- chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
+ &finalscore,&(*ambig_end_length_5),&(*ambig_prob_5),
+ &pairs,leftquerypos,rightpair,chroffset,chrhigh,
queryseq_ptr,queryuc_ptr,
- cdna_direction,watsonp,jump_late_p,pairpool,dynprogR,maxpeelback_distalmedial,
- extramaterial_end,extraband_end,defect_rate);
+ cdna_direction,watsonp,jump_late_p,pairpool,dynprogR,
+ maxpeelback,defect_rate);
}
debug(printf("Gappairs from build_pairs_end5:\n"));
@@ -8910,8 +8841,7 @@ static List_T
build_pairs_singles (int *dynprogindex, List_T path,
Univcoord_T chroffset, Univcoord_T chrhigh,
char *queryseq_ptr, char *queryuc_ptr, int querylength,
- bool watsonp, bool jump_late_p, int maxpeelback, int nullgap,
- int extraband_single, double defect_rate, int close_indels_mode,
+ bool watsonp, bool jump_late_p, int maxpeelback, double defect_rate,
Pairpool_T pairpool, Dynprog_T dynprogM,
Chrpos_T *last_genomedp5, Chrpos_T *last_genomedp3, bool forcep, bool finalp) {
List_T pairs = NULL, pairptr;
@@ -8978,8 +8908,7 @@ build_pairs_singles (int *dynprogindex, List_T path,
pairs = traverse_single_gap(&filledp,&(*dynprogindex),pairs,&path,leftpair,rightpair,
chroffset,chrhigh,queryseq_ptr,queryuc_ptr,querylength,watsonp,
jump_late_p,pairpool,dynprogM,last_genomedp5,last_genomedp3,
- maxpeelback,extraband_single,defect_rate,
- close_indels_mode,forcep,finalp);
+ maxpeelback,defect_rate,forcep,finalp);
/* (old comment:) forcep needs to be true here to avoid subsequent anomalies in building dualintrons, e.g., XM_376610.2_mRNA on 7:127885572..127888991 */
if (filledp == true) {
/* Discard the gap */
@@ -9015,11 +8944,9 @@ static List_T
build_pairs_dualintrons (int *dynprogindex, List_T path,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
char *queryseq_ptr, char *queryuc_ptr, int querylength,
- int cdna_direction, bool watsonp,
- bool jump_late_p, int maxpeelback, int nullgap,
- int extramaterial_paired, int extraband_paired, double defect_rate,
+ int cdna_direction, bool watsonp, bool jump_late_p, int maxpeelback, double defect_rate,
Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogR,
- Chrpos_T *last_genomedp5, Chrpos_T *last_genomedp3, bool simplep) {
+ Chrpos_T *last_genomedp5, Chrpos_T *last_genomedp3) {
List_T pairs = NULL, midexon_pairs = NULL, pairptr;
Pair_T pair, leftpair, midleftpair, midpair, midrightpair, rightpair;
int midquerypos, midgenomepos;
@@ -9150,8 +9077,7 @@ build_pairs_dualintrons (int *dynprogindex, List_T path,
chrnum,chroffset,chrhigh,midquerypos,midgenomepos,
queryseq_ptr,queryuc_ptr,querylength,cdna_direction,watsonp,
jump_late_p,pairpool,dynprogL,dynprogR,last_genomedp5,last_genomedp3,
- maxpeelback,nullgap,extramaterial_paired,extraband_paired,
- defect_rate,simplep,/*finalp*/false);
+ maxpeelback,defect_rate,/*finalp*/false);
}
}
}
@@ -9177,11 +9103,10 @@ build_pairs_introns (bool *shiftp, bool *incompletep,
#endif
char *queryseq_ptr, char *queryuc_ptr, int querylength,
int cdna_direction, bool watsonp, int genestrand, bool jump_late_p,
- int maxpeelback, int nullgap, int extramaterial_paired,
- int extraband_single, int extraband_paired, double defect_rate, int close_indels_mode,
+ int maxpeelback, double defect_rate,
Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Oligoindex_array_T oligoindices_minor,
- Diagpool_T diagpool, Cellpool_T cellpool, int sufflookback, int nsufflookback, int maxintronlen_bound,
+ Diagpool_T diagpool, Cellpool_T cellpool,
Chrpos_T *last_genomedp5, Chrpos_T *last_genomedp3, bool finalp, bool simplep) {
List_T pairs = NULL, pairptr;
Pair_T pair, leftpair, rightpair;
@@ -9228,9 +9153,7 @@ build_pairs_introns (bool *shiftp, bool *incompletep,
pairs,&path,leftpair,rightpair,
chroffset,chrhigh,queryseq_ptr,queryuc_ptr,querylength,cdna_direction,watsonp,
jump_late_p,pairpool,dynprogL,dynprogM,dynprogR,
- last_genomedp5,last_genomedp3,maxpeelback,
- extramaterial_paired,extraband_paired,extraband_single,
- defect_rate,close_indels_mode,/*finalp*/true);
+ last_genomedp5,last_genomedp3,maxpeelback,defect_rate,/*finalp*/true);
if (filledp == true) {
/* Discard gap */
@@ -9261,7 +9184,7 @@ build_pairs_introns (bool *shiftp, bool *incompletep,
#endif
queryseq_ptr,queryuc_ptr,querylength,watsonp,genestrand,
pairpool,maxpeelback,oligoindices_minor,
- diagpool,cellpool,sufflookback,nsufflookback,maxintronlen_bound);
+ diagpool,cellpool);
}
} else if (finalp == false && pair->queryjump > pair->genomejump + EXTRAQUERYGAP) {
@@ -9277,9 +9200,7 @@ build_pairs_introns (bool *shiftp, bool *incompletep,
pairs,&path,leftpair,rightpair,
chroffset,chrhigh,queryseq_ptr,queryuc_ptr,querylength,cdna_direction,watsonp,
jump_late_p,pairpool,dynprogL,dynprogM,dynprogR,
- last_genomedp5,last_genomedp3,maxpeelback,
- extramaterial_paired,extraband_paired,extraband_single,
- defect_rate,close_indels_mode,/*finalp*/true);
+ last_genomedp5,last_genomedp3,maxpeelback,defect_rate,/*finalp*/true);
if (filledp == true) {
/* Discard gap */
@@ -9314,8 +9235,7 @@ build_pairs_introns (bool *shiftp, bool *incompletep,
queryseq_ptr,queryuc_ptr,querylength,
cdna_direction,watsonp,jump_late_p,
pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5,last_genomedp3,
- maxpeelback,extramaterial_paired,extraband_paired,extraband_single,
- defect_rate,close_indels_mode,finalp,simplep);
+ maxpeelback,defect_rate,finalp,simplep);
/* Previously had forcep == true, because previously thought that adding large gap is not a good solution */
if (filledp == true) {
@@ -9360,8 +9280,7 @@ build_pairs_introns (bool *shiftp, bool *incompletep,
chroffset,chrhigh,
queryseq_ptr,queryuc_ptr,querylength,watsonp,
jump_late_p,pairpool,dynprogM,last_genomedp5,last_genomedp3,
- maxpeelback,extraband_single,defect_rate,
- close_indels_mode,/*forcep*/false,finalp);
+ maxpeelback,defect_rate,/*forcep*/false,finalp);
if (filledp == true) {
/* Discard the gap */
@@ -9425,12 +9344,12 @@ score_alignment (int *nmatches, int *nmismatches, int *nindels,
static List_T
score_introns (double *max_intron_score, double *avg_donor_score, double *avg_acceptor_score,
int *ncanonical, int *nbadintrons, List_T path, int cdna_direction, bool watsonp,
- Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh
#ifdef WASTE
- Pairpool_T pairpool,
+ , Pairpool_T pairpool
#endif
- int nullgap) {
- List_T pairs = NULL, pairptr, p, q;
+ ) {
+ List_T pairs = NULL, pairptr, p;
Pair_T pair, leftpair, rightpair;
Univcoord_T splicesitepos;
int minintronlen;
@@ -9728,7 +9647,7 @@ score_introns (double *max_intron_score, double *avg_donor_score, double *avg_ac
static int
end_compare (List_T x, List_T y, int cdna_direction, bool watsonp,
- Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, int nullgap,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
bool pairsp) {
List_T pairs1, pairs2, path1, path2;
double max_intron_score;
@@ -9751,7 +9670,7 @@ end_compare (List_T x, List_T y, int cdna_direction, bool watsonp,
path1 = List_reverse(pairs1);
debug11(printf("Calling score_introns for end_compare on path1\n"));
pairs1 = score_introns(&max_intron_score,&avg_donor_score_1,&avg_acceptor_score_1,&ncanonical_1,
- &nbadintrons_1,path1,cdna_direction,watsonp,chrnum,chroffset,chrhigh,nullgap);
+ &nbadintrons_1,path1,cdna_direction,watsonp,chrnum,chroffset,chrhigh);
alignment_score_1 = score_alignment(&nmatches_1,&nmismatches_1,&nindels_1,
#ifdef COMPLEX_DIRECTION
&indel_alignment_score_1,
@@ -9761,7 +9680,7 @@ end_compare (List_T x, List_T y, int cdna_direction, bool watsonp,
path2 = List_reverse(pairs2);
debug11(printf("Calling score_introns for end_compare on path2\n"));
pairs2 = score_introns(&max_intron_score,&avg_donor_score_2,&avg_acceptor_score_2,&ncanonical_2,
- &nbadintrons_2,path2,cdna_direction,watsonp,chrnum,chroffset,chrhigh,nullgap);
+ &nbadintrons_2,path2,cdna_direction,watsonp,chrnum,chroffset,chrhigh);
alignment_score_2 = score_alignment(&nmatches_2,&nmismatches_2,&nindels_2,
#ifdef COMPLEX_DIRECTION
&indel_alignment_score_2,
@@ -9774,7 +9693,7 @@ end_compare (List_T x, List_T y, int cdna_direction, bool watsonp,
debug11(printf("Calling score_introns for end_compare on path1\n"));
pairs1 = score_introns(&max_intron_score,&avg_donor_score_1,&avg_acceptor_score_1,&ncanonical_1,
- &nbadintrons_1,path1,cdna_direction,watsonp,chrnum,chroffset,chrhigh,nullgap);
+ &nbadintrons_1,path1,cdna_direction,watsonp,chrnum,chroffset,chrhigh);
alignment_score_1 = score_alignment(&nmatches_1,&nmismatches_1,&nindels_1,
#ifdef COMPLEX_DIRECTION
&indel_alignment_score_1,
@@ -9784,7 +9703,7 @@ end_compare (List_T x, List_T y, int cdna_direction, bool watsonp,
path1 = List_reverse(pairs1);
debug11(printf("Calling score_introns for end_compare on path2\n"));
pairs2 = score_introns(&max_intron_score,&avg_donor_score_2,&avg_acceptor_score_2,&ncanonical_2,
- &nbadintrons_2,path2,cdna_direction,watsonp,chrnum,chroffset,chrhigh,nullgap);
+ &nbadintrons_2,path2,cdna_direction,watsonp,chrnum,chroffset,chrhigh);
alignment_score_2 = score_alignment(&nmatches_2,&nmismatches_2,&nindels_2,
#ifdef COMPLEX_DIRECTION
&indel_alignment_score_2,
@@ -9856,6 +9775,7 @@ end_compare (List_T x, List_T y, int cdna_direction, bool watsonp,
}
+#if 0
static List_T
filter_goodness_hmm (bool *filterp, List_T pairs, double defect_rate) {
Pair_T pair;
@@ -9962,8 +9882,10 @@ filter_goodness_hmm (bool *filterp, List_T pairs, double defect_rate) {
return pairs;
}
+#endif
+#if 0
static List_T
filter_indels_hmm (bool *filterp, List_T pairs) {
Pair_T pair;
@@ -10071,6 +9993,7 @@ filter_indels_hmm (bool *filterp, List_T pairs) {
return pairs;
}
+#endif
@@ -10681,27 +10604,23 @@ path_compute_dir (double *defect_rate, List_T pairs,
#endif
char *queryseq_ptr, char *queryuc_ptr, int querylength,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
- int maxpeelback, int nullgap,
- int extramaterial_paired, int extraband_single, int extraband_paired,
+ int maxpeelback,
Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Chrpos_T *last_genomedp5, Chrpos_T *last_genomedp3,
Oligoindex_array_T oligoindices_minor,
- Diagpool_T diagpool, Cellpool_T cellpool, int sufflookback, int nsufflookback, int maxintronlen_bound,
- int close_indels_mode, int paired_favor_mode, int zero_offset) {
+ Diagpool_T diagpool, Cellpool_T cellpool) {
List_T path = NULL;
int dynprogindex_minor = DYNPROGINDEX_MINOR, dynprogindex_major = DYNPROGINDEX_MAJOR;
int iter0, iter1, iter2;
bool shiftp, incompletep;
- bool smoothp, shortp, badp, deletep, filterp, dual_break_p;
+ bool shortp, badp, deletep, dual_break_p;
int matches, unknowns, mismatches, qopens, qindels, topens, tindels,
ncanonical, nsemicanonical, nnoncanonical;
double min_splice_prob;
-#ifdef DEBUG
- Pair_T firstpair, lastpair;
-#endif
+
iter0 = 0;
- filterp = dual_break_p = true;
+ dual_break_p = true;
while ((/* filterp == true || */ dual_break_p == true) && iter0 < MAXITER_CYCLES) {
path = List_reverse(pairs);
@@ -10718,8 +10637,7 @@ path_compute_dir (double *defect_rate, List_T pairs,
cdna_direction,iter0));
pairs = build_pairs_singles(&dynprogindex_minor,path,
chroffset,chrhigh,queryseq_ptr,queryuc_ptr,querylength,watsonp,
- jump_late_p,maxpeelback,nullgap,extraband_single,
- /*defect_rate*/0.0,close_indels_mode,pairpool,dynprogM,
+ jump_late_p,maxpeelback,/*defect_rate*/0.0,pairpool,dynprogM,
last_genomedp5,last_genomedp3,/*forcep*/false,/*finalp*/false);
#ifdef DEBUG8
if (stage3debug == POST_SINGLES) {
@@ -10750,8 +10668,7 @@ path_compute_dir (double *defect_rate, List_T pairs,
cdna_direction,iter0));
pairs = build_pairs_singles(&dynprogindex_minor,path,
chroffset,chrhigh,queryseq_ptr,queryuc_ptr,querylength,watsonp,
- jump_late_p,maxpeelback,nullgap,extraband_single,
- /*defect_rate*/0.0,close_indels_mode,pairpool,dynprogM,
+ jump_late_p,maxpeelback,/*defect_rate*/0.0,pairpool,dynprogM,
last_genomedp5,last_genomedp3,/*forcep*/false,/*finalp*/false);
/* <<pairs */
}
@@ -10791,10 +10708,8 @@ path_compute_dir (double *defect_rate, List_T pairs,
#endif
queryseq_ptr,queryuc_ptr,querylength,
cdna_direction,watsonp,genestrand,jump_late_p,
- maxpeelback,nullgap,extramaterial_paired,extraband_single,extraband_paired,
- *defect_rate,close_indels_mode,pairpool,dynprogL,dynprogM,dynprogR,
+ maxpeelback,*defect_rate,pairpool,dynprogL,dynprogM,dynprogR,
oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,maxintronlen_bound,
last_genomedp5,last_genomedp3,/*finalp*/false,/*simplep*/true);
debug(printf(" => Result of Pass 3c (introns):\n"));
debug(Pair_dump_list(pairs,/*zerobasedp*/true));
@@ -10827,8 +10742,7 @@ path_compute_dir (double *defect_rate, List_T pairs,
/* Smoothing by probability */
path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
- pairs = assign_intron_probs(path,cdna_direction,watsonp,queryseq_ptr,
- chrnum,chroffset,chrhigh,pairpool);
+ pairs = assign_intron_probs(path,cdna_direction,watsonp,chrnum,chroffset,chrhigh,pairpool);
Smooth_reset(pairs);
pairs = Smooth_pairs_by_intronprobs(&badp,pairs,pairpool);
@@ -10869,9 +10783,8 @@ path_compute_dir (double *defect_rate, List_T pairs,
/* Pass 3b: dual introns. pairs --> pairs */
pairs = build_pairs_dualintrons(&dynprogindex_major,path,chrnum,chroffset,chrhigh,
queryseq_ptr,queryuc_ptr,querylength,cdna_direction,watsonp,jump_late_p,
- maxpeelback,nullgap,extramaterial_paired,extraband_paired,
- *defect_rate,pairpool,dynprogL,dynprogR,
- last_genomedp5,last_genomedp3,/*simplep*/true);
+ maxpeelback,*defect_rate,pairpool,dynprogL,dynprogR,
+ last_genomedp5,last_genomedp3);
debug(printf(" => Result of Pass 3b (dual introns):\n"));
debug(Pair_dump_list(pairs,/*zerobasedp*/true));
}
@@ -10885,7 +10798,7 @@ path_compute_dir (double *defect_rate, List_T pairs,
iter1++;
debug(printf("At end of inner loop: iter1 %d, shortp %d, deletep %d, badp %d\n",
- iter1,shortp,deletep,badp,dual_break_p));
+ iter1,shortp,deletep,badp));
}
#ifdef DEBUG8
@@ -10926,9 +10839,8 @@ path_compute_dir (double *defect_rate, List_T pairs,
queryseq_ptr,queryuc_ptr,querylength,
cdna_direction,watsonp,genestrand,jump_late_p,pairpool,
dynprogL,dynprogM,dynprogR,last_genomedp5,last_genomedp3,maxpeelback,
- oligoindices_minor,diagpool,cellpool,sufflookback,nsufflookback,
- maxintronlen_bound,extramaterial_paired,extraband_paired,extraband_single,
- *defect_rate,close_indels_mode,/*finalp*/false,/*simplep*/true);
+ oligoindices_minor,diagpool,cellpool,
+ *defect_rate,/*finalp*/false,/*simplep*/true);
/* Must end with path to start loop */
path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
pairs = List_reverse(path);
@@ -10950,7 +10862,7 @@ path_compute_dir (double *defect_rate, List_T pairs,
/* filterp = false; */
#endif
iter0++;
- debug(printf("At end of outer loop: filterp %d, dual_break_p %d\n",filterp,dual_break_p));
+ debug(printf("At end of outer loop: dual_break_p %d\n",dual_break_p));
}
path = List_reverse(pairs);
@@ -10961,30 +10873,18 @@ path_compute_dir (double *defect_rate, List_T pairs,
static List_T
path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, double *ambig_prob_5,
double defect_rate, List_T pairs, int cdna_direction,
- bool watsonp, int genestrand, bool jump_late_p, int querylength,
- char *queryseq_ptr, char *queryuc_ptr,
+ bool watsonp, bool jump_late_p, char *queryseq_ptr, char *queryuc_ptr,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
Univcoord_T knownsplice_limit_low, Univcoord_T knownsplice_limit_high,
- int maxpeelback, int maxpeelback_distalmedial, int nullgap,
- int extramaterial_end, int extraband_end,
- int extramaterial_paired, int extraband_single, int extraband_paired,
- Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- bool do_final_p, Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool,
- int sufflookback, int nsufflookback,
- int maxintronlen_bound, int close_indels_mode, int paired_favor_mode, int zero_offset) {
+ int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogR) {
List_T path = NULL;
- int iter1, iter2;
- int dynprogindex_minor = DYNPROGINDEX_MINOR, dynprogindex_major = DYNPROGINDEX_MAJOR;
+ int iter1;
+ int dynprogindex_minor = DYNPROGINDEX_MINOR;
int nmatches, nunknowns, nmismatches, qopens, qindels, topens, tindels,
ncanonical, nsemicanonical, nnoncanonical;
double min_splice_prob;
- bool filterp = true, dual_break_p = true;
- int distance5, distance3, totaljump5, totaljump3, npairs5, npairs3, donep;
- bool knownsplice5p, knownsplice3p, chop_exon_p;
-#ifdef DEBUG
- Pair_T firstpair, lastpair;
-#endif
- bool trim5p, trim3p, adjacent_indels_p;
+ bool knownsplice5p, chop_exon_p;
+ bool trim5p;
*ambig_end_length_5 = 0;
*ambig_prob_5 = 0.0;
@@ -11027,9 +10927,7 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do
knownsplice_limit_low,knownsplice_limit_high,
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,
- maxpeelback,maxpeelback_distalmedial,
- nullgap,extramaterial_end,extraband_end,
- defect_rate,pairpool,dynprogR,
+ maxpeelback,defect_rate,pairpool,dynprogR,
/*extendp*/true,/*endalign*/QUERYEND_GAP);
@@ -11061,8 +10959,7 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do
/* Using iter1 to avoid the possibility of an infinite loop */
iter1 = 0;
while (iter1 < 5 && trim5p == true) {
- pairs = trim_end5_exon_indels(&trim5p,*ambig_end_length_5,pairs,paired_favor_mode,zero_offset,querylength,
- watsonp,cdna_direction,maxintronlen_bound);
+ pairs = trim_end5_exon_indels(&trim5p,*ambig_end_length_5,pairs,cdna_direction);
if (trim5p == true) {
pairs = build_pairs_end5(&knownsplice5p,&(*ambig_end_length_5),&(*ambig_splicetype_5),&(*ambig_prob_5),
&chop_exon_p,&dynprogindex_minor,pairs,
@@ -11070,9 +10967,7 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do
knownsplice_limit_low,knownsplice_limit_high,
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,
- maxpeelback,maxpeelback_distalmedial,
- nullgap,extramaterial_end,extraband_end,
- defect_rate,pairpool,dynprogR,/*extendp*/true,
+ maxpeelback,defect_rate,pairpool,dynprogR,/*extendp*/true,
/*endalign*/BEST_LOCAL);
debug3(printf("AFTER 5' REBUILD\n"));
debug3(Pair_dump_list(pairs,true));
@@ -11092,9 +10987,7 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do
knownsplice_limit_low,knownsplice_limit_high,
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,
- maxpeelback,maxpeelback_distalmedial,
- nullgap,extramaterial_end,extraband_end,
- defect_rate,pairpool,dynprogR,/*extendp*/true,
+ maxpeelback,defect_rate,pairpool,dynprogR,/*extendp*/true,
/*endalign*/QUERYEND_INDELS);
#endif
}
@@ -11108,9 +11001,7 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do
knownsplice_limit_low,knownsplice_limit_high,
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,
- maxpeelback,maxpeelback_distalmedial,
- nullgap,extramaterial_end,extraband_end,
- defect_rate,pairpool,dynprogR,
+ maxpeelback,defect_rate,pairpool,dynprogR,
/*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
debug(Pair_dump_list(pairs,true));
@@ -11123,30 +11014,19 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do
static List_T
path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, double *ambig_prob_3,
double defect_rate, List_T path, int cdna_direction,
- bool watsonp, int genestrand, bool jump_late_p, int querylength,
+ bool watsonp, bool jump_late_p, int querylength,
char *queryseq_ptr, char *queryuc_ptr,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
Univcoord_T knownsplice_limit_low, Univcoord_T knownsplice_limit_high,
- int maxpeelback, int maxpeelback_distalmedial, int nullgap,
- int extramaterial_end, int extraband_end,
- int extramaterial_paired, int extraband_single, int extraband_paired,
- Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- bool do_final_p, Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool,
- int sufflookback, int nsufflookback,
- int maxintronlen_bound, int close_indels_mode, int paired_favor_mode, int zero_offset) {
+ int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL) {
List_T pairs = NULL;
- int iter1, iter2;
- int dynprogindex_minor = DYNPROGINDEX_MINOR, dynprogindex_major = DYNPROGINDEX_MAJOR;
+ int iter1;
+ int dynprogindex_minor = DYNPROGINDEX_MINOR;
int nmatches, nunknowns, nmismatches, qopens, qindels, topens, tindels,
ncanonical, nsemicanonical, nnoncanonical;
double min_splice_prob;
- bool filterp = true, dual_break_p = true;
- int distance5, distance3, totaljump5, totaljump3, npairs5, npairs3, donep;
- bool knownsplice5p, knownsplice3p, chop_exon_p;
-#ifdef DEBUG
- Pair_T firstpair, lastpair;
-#endif
- bool trim5p, trim3p, adjacent_indels_p;
+ bool knownsplice3p, chop_exon_p;
+ bool trim3p;
*ambig_end_length_3 = 0;
*ambig_prob_3 = 0.0;
@@ -11189,9 +11069,7 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do
knownsplice_limit_low,knownsplice_limit_high,
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,
- maxpeelback,maxpeelback_distalmedial,
- nullgap,extramaterial_end,extraband_end,
- defect_rate,pairpool,dynprogL,
+ maxpeelback,defect_rate,pairpool,dynprogL,
/*extendp*/true,/*endalign*/QUERYEND_GAP);
/* Necessary to insert gaps and assign gap types (fills in cDNA
@@ -11222,8 +11100,7 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do
/* Using iter1 to avoid the possibility of an infinite loop */
iter1 = 0;
while (iter1 < 5 && trim3p == true) {
- path = trim_end3_exon_indels(&trim3p,*ambig_end_length_3,path,paired_favor_mode,zero_offset,querylength,
- watsonp,cdna_direction,maxintronlen_bound);
+ path = trim_end3_exon_indels(&trim3p,*ambig_end_length_3,path,cdna_direction);
if (trim3p == true) {
path = build_path_end3(&knownsplice3p,&(*ambig_end_length_3),&(*ambig_splicetype_3),&(*ambig_prob_3),
&chop_exon_p,&dynprogindex_minor,path,
@@ -11231,9 +11108,7 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do
knownsplice_limit_low,knownsplice_limit_high,
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,
- maxpeelback,maxpeelback_distalmedial,
- nullgap,extramaterial_end,extraband_end,
- defect_rate,pairpool,dynprogL,/*extendp*/true,
+ maxpeelback,defect_rate,pairpool,dynprogL,/*extendp*/true,
/*endalign*/BEST_LOCAL);
debug3(printf("AFTER 3' REBUILD\n"));
debug3(Pair_dump_list(path,true));
@@ -11252,9 +11127,7 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do
knownsplice_limit_low,knownsplice_limit_high,
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,
- maxpeelback,maxpeelback_distalmedial,
- nullgap,extramaterial_end,extraband_end,
- defect_rate,pairpool,dynprogL,/*extendp*/true,
+ maxpeelback,defect_rate,pairpool,dynprogL,/*extendp*/true,
/*endalign*/QUERYEND_NOGAPS);
#endif
}
@@ -11268,9 +11141,7 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do
knownsplice_limit_low,knownsplice_limit_high,
queryseq_ptr,queryuc_ptr,
cdna_direction,watsonp,jump_late_p,
- maxpeelback,maxpeelback_distalmedial,
- nullgap,extramaterial_end,extraband_end,
- defect_rate,pairpool,dynprogL,
+ maxpeelback,defect_rate,pairpool,dynprogL,
/*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
debug(Pair_dump_list(path,true));
@@ -11287,28 +11158,14 @@ path_compute_final (double defect_rate, List_T pairs, int cdna_direction, bool w
#endif
char *queryseq_ptr, char *queryuc_ptr,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
- Univcoord_T knownsplice_limit_low, Univcoord_T knownsplice_limit_high,
- int maxpeelback, int maxpeelback_distalmedial, int nullgap,
- int extramaterial_end, int extraband_end,
- int extramaterial_paired, int extraband_single, int extraband_paired,
- Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- Chrpos_T *last_genomedp5, Chrpos_T *last_genomedp3, bool do_final_p,
+ int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ Chrpos_T *last_genomedp5, Chrpos_T *last_genomedp3,
Oligoindex_array_T oligoindices_minor,
- Diagpool_T diagpool, Cellpool_T cellpool, int sufflookback, int nsufflookback,
- int maxintronlen_bound, int close_indels_mode, int paired_favor_mode, int zero_offset) {
+ Diagpool_T diagpool, Cellpool_T cellpool) {
List_T path = NULL;
- int iter1, iter2;
int dynprogindex_minor = DYNPROGINDEX_MINOR, dynprogindex_major = DYNPROGINDEX_MAJOR;
- int matches, unknowns, mismatches, qopens, qindels, topens, tindels,
- ncanonical, nsemicanonical, nnoncanonical;
- double min_splice_prob;
- bool filterp = true, dual_break_p = true;
- bool shiftp, incompletep, shortp, deletep;
- bool chop_exon_p;
-#ifdef DEBUG
- Pair_T firstpair, lastpair;
-#endif
- bool adjacent_indels_p;
+ bool dual_break_p = true;
+ bool shiftp, incompletep;
debug(printf("Entering path_compute_final\n"));
@@ -11316,8 +11173,7 @@ path_compute_final (double defect_rate, List_T pairs, int cdna_direction, bool w
path = List_reverse(pairs);
pairs = build_pairs_singles(&dynprogindex_minor,path,
chroffset,chrhigh,queryseq_ptr,queryuc_ptr,querylength,watsonp,
- jump_late_p,maxpeelback,nullgap,extraband_single,
- defect_rate,close_indels_mode,pairpool,dynprogM,
+ jump_late_p,maxpeelback,defect_rate,pairpool,dynprogM,
last_genomedp5,last_genomedp3,/*forcep*/true,/*finalp*/true);
#if 1
@@ -11332,10 +11188,8 @@ path_compute_final (double defect_rate, List_T pairs, int cdna_direction, bool w
#endif
queryseq_ptr,queryuc_ptr,querylength,
cdna_direction,watsonp,genestrand,jump_late_p,
- maxpeelback,nullgap,extramaterial_paired,extraband_single,extraband_paired,
- defect_rate,close_indels_mode,pairpool,dynprogL,dynprogM,dynprogR,
+ maxpeelback,defect_rate,pairpool,dynprogL,dynprogM,dynprogR,
oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,maxintronlen_bound,
last_genomedp5,last_genomedp3,/*finalp*/true,/*simplep*/true);
#endif
@@ -11349,9 +11203,7 @@ path_compute_final (double defect_rate, List_T pairs, int cdna_direction, bool w
cdna_direction,watsonp,genestrand,jump_late_p,pairpool,
dynprogL,dynprogM,dynprogR,last_genomedp5,last_genomedp3,maxpeelback,
oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,maxintronlen_bound,
- extramaterial_paired,extraband_paired,extraband_single,
- defect_rate,close_indels_mode,/*finalp*/true,/*simplep*/true);
+ defect_rate,/*finalp*/true,/*simplep*/true);
path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
pairs = assign_gap_types(path,cdna_direction,watsonp,queryseq_ptr,
@@ -11377,7 +11229,9 @@ trim_novel_spliceends (List_T pairs,
Pair_T pair;
Univcoord_T genomicpos, start_genomicpos, end_genomicpos, splice_genomepos_5, splice_genomepos_3;
Univcoord_T start, end;
- double donor_prob, acceptor_prob, max_prob_5 = 0.0, max_prob_3 = 0.0;
+ double donor_prob, acceptor_prob, max_prob_5 = 0.0, max_prob_3 = 0.0,
+ max_prob_sense_forward_5 = 0.0, max_prob_sense_anti_5 = 0.0,
+ max_prob_sense_forward_3 = 0.0, max_prob_sense_anti_3 = 0.0;
Splicetype_T splicetype5, splicetype3;
int splice_cdna_direction_5, splice_sensedir_5, splice_cdna_direction_3, splice_sensedir_3;
bool mismatchp;
@@ -11497,19 +11351,25 @@ trim_novel_spliceends (List_T pairs,
donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 1 */
acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 5 */
debug13(printf("3', watson, sense null %u %f %f\n",genomicpos,donor_prob,acceptor_prob));
- if (donor_prob > max_prob_3) {
- max_prob_3 = donor_prob;
- splice_genomepos_3 = genomicpos - 1;
- splice_cdna_direction_3 = +1;
- splice_sensedir_3 = SENSE_FORWARD;
- splicetype3 = DONOR;
+ if (donor_prob > max_prob_sense_forward_3) {
+ max_prob_sense_forward_3 = donor_prob;
+ if (donor_prob > max_prob_3) {
+ max_prob_3 = donor_prob;
+ splice_genomepos_3 = genomicpos - 1;
+ splice_cdna_direction_3 = +1;
+ splice_sensedir_3 = SENSE_FORWARD;
+ splicetype3 = DONOR;
+ }
}
- if (acceptor_prob > max_prob_3) {
- max_prob_3 = acceptor_prob;
- splice_genomepos_3 = genomicpos - 1;
- splice_cdna_direction_3 = -1;
- splice_sensedir_3 = SENSE_ANTI;
- splicetype3 = ANTIACCEPTOR;
+ if (acceptor_prob > max_prob_sense_anti_3) {
+ max_prob_sense_anti_3 = acceptor_prob;
+ if (acceptor_prob > max_prob_3) {
+ max_prob_3 = acceptor_prob;
+ splice_genomepos_3 = genomicpos - 1;
+ splice_cdna_direction_3 = -1;
+ splice_sensedir_3 = SENSE_ANTI;
+ splicetype3 = ANTIACCEPTOR;
+ }
}
}
@@ -11522,19 +11382,25 @@ trim_novel_spliceends (List_T pairs,
donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 3 */
acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 7 */
debug13(printf("3', crick, sense null %u %f %f\n",genomicpos,donor_prob,acceptor_prob));
- if (donor_prob > max_prob_3) {
- max_prob_3 = donor_prob;
- splice_genomepos_3 = (chrhigh - chroffset) - genomicpos;
- splice_cdna_direction_3 = +1;
- splice_sensedir_3 = SENSE_FORWARD;
- splicetype3 = ANTIDONOR;
+ if (donor_prob > max_prob_sense_forward_3) {
+ max_prob_sense_forward_3 = donor_prob;
+ if (donor_prob > max_prob_3) {
+ max_prob_3 = donor_prob;
+ splice_genomepos_3 = (chrhigh - chroffset) - genomicpos;
+ splice_cdna_direction_3 = +1;
+ splice_sensedir_3 = SENSE_FORWARD;
+ splicetype3 = ANTIDONOR;
+ }
}
- if (acceptor_prob > max_prob_3) {
- max_prob_3 = acceptor_prob;
- splice_genomepos_3 = (chrhigh - chroffset) - genomicpos;
- splice_cdna_direction_3 = -1;
- splice_sensedir_3 = SENSE_ANTI;
- splicetype3 = ACCEPTOR;
+ if (acceptor_prob > max_prob_sense_anti_3) {
+ max_prob_sense_anti_3 = acceptor_prob;
+ if (acceptor_prob > max_prob_3) {
+ max_prob_3 = acceptor_prob;
+ splice_genomepos_3 = (chrhigh - chroffset) - genomicpos;
+ splice_cdna_direction_3 = -1;
+ splice_sensedir_3 = SENSE_ANTI;
+ splicetype3 = ACCEPTOR;
+ }
}
}
}
@@ -11668,19 +11534,25 @@ trim_novel_spliceends (List_T pairs,
acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 2 */
donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 6 */
debug13(printf("5', watson, sense null %u %f %f\n",genomicpos,donor_prob,acceptor_prob));
- if (acceptor_prob > max_prob_5) {
- max_prob_5 = acceptor_prob;
- splice_genomepos_5 = genomicpos;
- splice_cdna_direction_5 = +1;
- splice_sensedir_5 = SENSE_FORWARD;
- splicetype5 = ACCEPTOR;
+ if (acceptor_prob > max_prob_sense_forward_5) {
+ max_prob_sense_forward_5 = acceptor_prob;
+ if (acceptor_prob > max_prob_5) {
+ max_prob_5 = acceptor_prob;
+ splice_genomepos_5 = genomicpos;
+ splice_cdna_direction_5 = +1;
+ splice_sensedir_5 = SENSE_FORWARD;
+ splicetype5 = ACCEPTOR;
+ }
}
- if (donor_prob > max_prob_5) {
- max_prob_5 = donor_prob;
- splice_genomepos_5 = genomicpos;
- splice_cdna_direction_5 = -1;
- splice_sensedir_5 = SENSE_ANTI;
- splicetype5 = ANTIDONOR;
+ if (donor_prob > max_prob_sense_anti_5) {
+ max_prob_sense_anti_5 = donor_prob;
+ if (donor_prob > max_prob_5) {
+ max_prob_5 = donor_prob;
+ splice_genomepos_5 = genomicpos;
+ splice_cdna_direction_5 = -1;
+ splice_sensedir_5 = SENSE_ANTI;
+ splicetype5 = ANTIDONOR;
+ }
}
}
@@ -11693,19 +11565,25 @@ trim_novel_spliceends (List_T pairs,
acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 4 */
donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 8 */
debug13(printf("5', crick, sense null %u %f %f\n",genomicpos,donor_prob,acceptor_prob));
- if (acceptor_prob > max_prob_5) {
- max_prob_5 = acceptor_prob;
- splice_genomepos_5 = (chrhigh - chroffset) - genomicpos + 1;
- splice_cdna_direction_5 = +1;
- splice_sensedir_5 = SENSE_FORWARD;
- splicetype5 = ANTIACCEPTOR;
+ if (acceptor_prob > max_prob_sense_forward_5) {
+ max_prob_sense_forward_5 = acceptor_prob;
+ if (acceptor_prob > max_prob_5) {
+ max_prob_5 = acceptor_prob;
+ splice_genomepos_5 = (chrhigh - chroffset) - genomicpos + 1;
+ splice_cdna_direction_5 = +1;
+ splice_sensedir_5 = SENSE_FORWARD;
+ splicetype5 = ANTIACCEPTOR;
+ }
}
- if (donor_prob > max_prob_5) {
- max_prob_5 = donor_prob;
- splice_genomepos_5 = (chrhigh - chroffset) - genomicpos + 1;
- splice_cdna_direction_5 = -1;
- splice_sensedir_5 = SENSE_ANTI;
- splicetype5 = DONOR;
+ if (donor_prob > max_prob_sense_anti_5) {
+ max_prob_sense_anti_5 = donor_prob;
+ if (donor_prob > max_prob_5) {
+ max_prob_5 = donor_prob;
+ splice_genomepos_5 = (chrhigh - chroffset) - genomicpos + 1;
+ splice_cdna_direction_5 = -1;
+ splice_sensedir_5 = SENSE_ANTI;
+ splicetype5 = DONOR;
+ }
}
}
}
@@ -11731,7 +11609,7 @@ trim_novel_spliceends (List_T pairs,
if (*sensedir == SENSE_NULL) {
if (max_prob_3 > max_prob_5) {
- if (max_prob_3 > END_SPLICESITE_PROB) {
+ if (max_prob_3 >= END_SPLICESITE_PROB) {
debug13(printf("Found good splice %s on 3' end at %u with probability %f\n",
Splicetype_string(splicetype3),splice_genomepos_3,max_prob_3));
path = List_reverse(pairs);
@@ -11744,13 +11622,21 @@ trim_novel_spliceends (List_T pairs,
*ambig_splicetype_3 = splicetype3;
*ambig_prob_3 = max_prob_3;
*cdna_direction = splice_cdna_direction_3;
- *sensedir = splice_sensedir_3;
debug13(printf("Set ambig_end_length_3 to be %d\n",*ambig_end_length_3));
+ if (max_prob_sense_forward_3 >= END_SPLICESITE_PROB && max_prob_sense_anti_3 < END_SPLICESITE_PROB
+ && max_prob_sense_anti_5 < END_SPLICESITE_PROB) {
+ *sensedir = splice_sensedir_3;
+ } else if (max_prob_sense_anti_3 >= END_SPLICESITE_PROB && max_prob_sense_forward_3 < END_SPLICESITE_PROB
+ && max_prob_sense_forward_5 < END_SPLICESITE_PROB) {
+ *sensedir = splice_sensedir_3;
+ } else {
+ /* Not enough evidence to set sensedir */
+ }
}
pairs = List_reverse(path);
}
} else {
- if (max_prob_5 > END_SPLICESITE_PROB) {
+ if (max_prob_5 >= END_SPLICESITE_PROB) {
debug13(printf("Found good splice %s on 5' end at %u with probability %f\n",
Splicetype_string(splicetype5),splice_genomepos_5,max_prob_5));
while (pairs != NULL && ((Pair_T) pairs->first)->genomepos < splice_genomepos_5) {
@@ -11762,8 +11648,16 @@ trim_novel_spliceends (List_T pairs,
*ambig_splicetype_5 = splicetype5;
*ambig_prob_5 = max_prob_5;
*cdna_direction = splice_cdna_direction_5;
- *sensedir = splice_sensedir_5;
debug13(printf("Set ambig_end_length_5 to be %d\n",*ambig_end_length_5));
+ if (max_prob_sense_forward_5 >= END_SPLICESITE_PROB && max_prob_sense_anti_5 < END_SPLICESITE_PROB
+ && max_prob_sense_anti_3 < END_SPLICESITE_PROB) {
+ *sensedir = splice_sensedir_5;
+ } else if (max_prob_sense_anti_5 >= END_SPLICESITE_PROB && max_prob_sense_forward_5 < END_SPLICESITE_PROB
+ && max_prob_sense_forward_3 < END_SPLICESITE_PROB) {
+ *sensedir = splice_sensedir_5;
+ } else {
+ /* Not enough evidence to set sensedir */
+ }
}
}
}
@@ -11779,27 +11673,29 @@ static List_T
path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
Splicetype_T *ambig_splicetype_5, Splicetype_T *ambig_splicetype_3,
double *ambig_prob_5, double *ambig_prob_3,
- List_T pairs, int *cdna_direction, int *sensedir, bool watsonp, bool jump_late_p,
- int querylength, char *queryseq_ptr, char *queryuc_ptr,
- Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
+ List_T pairs, int *cdna_direction, bool watsonp, bool jump_late_p,
+ int querylength,
+#ifdef GSNAP
+ int *sensedir,
+#endif
+ char *queryseq_ptr, char *queryuc_ptr,
+ Univcoord_T chroffset, Univcoord_T chrhigh,
Univcoord_T knownsplice_limit_low, Univcoord_T knownsplice_limit_high,
- int maxpeelback, int maxpeelback_distalmedial, int nullgap,
- int extramaterial_end, int extraband_end,
- Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogR,
- int maxintronlen_bound, int paired_favor_mode, int zero_offset) {
+ int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogR) {
List_T path = NULL;
int dynprogindex_minor = DYNPROGINDEX_MINOR;
- int maxpeelback5, maxpeelback3;
bool chop_exon_p;
bool knownsplice5p = false, knownsplice3p = false;
- bool trim5p, trim3p, trim5p_ignore, trim3p_ignore;
- bool shiftp, incompletep;
+ bool trimp, trim5p, trim3p, trim5p_ignore, trim3p_ignore;
int iter = 0;
- maxpeelback5 = maxpeelback3 = maxpeelback;
-
- debug(printf("Entering path_trim with cdna_direction %d and sensedir %d\n",*cdna_direction,*sensedir));
- debug3(printf("Entering path_trim with cdna_direction %d and sensedir %d\n",*cdna_direction,*sensedir));
+#ifdef GSNAP
+ debug(printf("Entering path_trim with cdna_direction %d and sensedir %d\n",*cdna_direction,sensedir));
+ debug3(printf("Entering path_trim with cdna_direction %d and sensedir %d\n",*cdna_direction,sensedir));
+#else
+ debug(printf("Entering path_trim with cdna_direction %d\n",*cdna_direction));
+ debug3(printf("Entering path_trim with cdna_direction %d\n",*cdna_direction));
+#endif
#ifdef GSNAP
if (novelsplicingp == true) {
@@ -11826,13 +11722,14 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
/* Done anyway within loop below */
/* pairs = Pair_trim_ends(&trim5p,&trim3p,pairs); */
- trim5p = trim3p = true;
+ trimp = trim5p = trim3p = true;
debug3(printf("After Pair_trim_ends: trim5p %d, trim3p %d\n",trim5p,trim3p));
debug3(Pair_dump_list(pairs,true));
debug3(printf("\n"));
- while (iter++ < 3 && (trim5p == true || trim3p == true)) {
+ while (iter++ < 3 && trimp == true) {
+ trimp = false;
/* Revised: Using QUERYEND_NOGAPS combined with Pair_trim_ends */
/* Old: Extend with BEST_LOCAL to get right local (not global) answer,
and with maxpeelback == 0 to ensure we perform no peelback */
@@ -11844,12 +11741,12 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
queryseq_ptr,queryuc_ptr,
*cdna_direction,watsonp,jump_late_p,
- maxpeelback,maxpeelback_distalmedial,
- nullgap,extramaterial_end,extraband_end,
- defect_rate,pairpool,dynprogR,
+ maxpeelback,defect_rate,pairpool,dynprogR,
/*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
- pairs = trim_end5_exon_indels(&trim5p,*ambig_end_length_5,pairs,paired_favor_mode,zero_offset,querylength,
- watsonp,*cdna_direction,maxintronlen_bound);
+ pairs = trim_end5_exon_indels(&trim5p,*ambig_end_length_5,pairs,*cdna_direction);
+ if (trim5p == true) {
+ trimp = true;
+ }
}
if (trim3p == true) {
@@ -11862,13 +11759,13 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
knownsplice_limit_low,knownsplice_limit_high,
queryseq_ptr,queryuc_ptr,
*cdna_direction,watsonp,jump_late_p,
- maxpeelback,maxpeelback_distalmedial,
- nullgap,extramaterial_end,extraband_end,
- defect_rate,pairpool,dynprogL,
+ maxpeelback,defect_rate,pairpool,dynprogL,
/*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
- path = trim_end3_exon_indels(&trim3p,*ambig_end_length_3,path,paired_favor_mode,zero_offset,querylength,
- watsonp,*cdna_direction,maxintronlen_bound);
+ path = trim_end3_exon_indels(&trim3p,*ambig_end_length_3,path,*cdna_direction);
pairs = List_reverse(path);
+ if (trim3p == true) {
+ trimp = true;
+ }
}
/* Important to end the alignment with Pair_trim_ends, or else trimming will be faulty */
@@ -11881,6 +11778,8 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
}
+ /* Cannot put trim_novel_spliceends here, which can generate an infinite loop in calling procedures */
+
debug3(printf("Final result of path_trim: chroffset = %u, cdna_direction %d, sensedir %d\n",
chroffset,*cdna_direction,*sensedir));
debug3(Pair_dump_list(pairs,true));
@@ -11899,7 +11798,7 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
double *ambig_prob_5, double *ambig_prob_3,
int *unknowns, int *mismatches, int *qopens, int *qindels, int *topens, int *tindels,
int *ncanonical, int *nsemicanonical, int *nnoncanonical, double *min_splice_prob,
- Stage2_T stage2,
+ List_T stage2pairs, List_T all_stage2_starts, List_T all_stage2_ends,
#ifdef PMAP
char *queryaaseq_ptr,
#endif
@@ -11908,23 +11807,15 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
Univcoord_T knownsplice_limit_low, Univcoord_T knownsplice_limit_high,
bool watsonp, int genestrand, bool jump_late_p,
- int maxpeelback, int maxpeelback_distalmedial, int nullgap,
- int extramaterial_end, int extramaterial_paired,
- int extraband_single, int extraband_end, int extraband_paired, int minendexon,
- Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- int ngap, bool diagnosticp, bool checkp,
- bool do_final_p, int sense_try, int sense_filter,
- Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
- int sufflookback, int nsufflookback, int maxintronlen, int close_indels_mode,
- int paired_favor_mode, int zero_offset) {
+ int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ int sense_try, int sense_filter,
+ Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
struct Pair_T *pairarray;
- List_T stage2pairs, p;
- bool shortp, deletep, fwd_shiftp, fwd_incompletep, rev_shiftp, rev_incompletep;
+ List_T p;
Chrpos_T *last_genomedp5_fwd = NULL, *last_genomedp3_fwd = NULL, *last_genomedp5_rev = NULL, *last_genomedp3_rev = NULL;
- List_T pairs_pretrim, pairs_fwd, pairs_rev, best_pairs, temp_pairs, pairs, path_fwd, path_rev, best_path, temp_path, path;
+ List_T pairs_pretrim, pairs_fwd, pairs_rev, best_pairs, temp_pairs, path_fwd, path_rev, best_path, temp_path;
List_T copy;
List_T joined_ends, joined_starts;
- int alignment_score_fwd, alignment_score_rev;
int ncanonical_fwd, nsemicanonical_fwd, nnoncanonical_fwd,
ncanonical_rev, nsemicanonical_rev, nnoncanonical_rev;
int nbadintrons_fwd, nbadintrons_rev;
@@ -11942,7 +11833,7 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
#endif
- stage2pairs = Stage2_middle(stage2);
+ /* stage2pairs = Stage2_middle(stage2); */
debug0(printf("Stage 3: *** Starting stage 3 at chrnum #%d, chrstart %u)\n",
chrnum,((Pair_T) stage2pairs->first)->genomepos));
debug(printf("Stage 3: *** Starting stage 3 at chrnum #%d, chrstart %u)\n",
@@ -11988,12 +11879,8 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
queryaaseq_ptr,
#endif
queryseq_ptr,queryuc_ptr,querylength,chrnum,chroffset,chrhigh,
- maxpeelback,nullgap,
- extramaterial_paired,extraband_single,extraband_paired,
- pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_fwd,last_genomedp3_fwd,
- oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,maxintronlen,close_indels_mode,
- paired_favor_mode,zero_offset);
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_fwd,last_genomedp3_fwd,
+ oligoindices_minor,diagpool,cellpool);
/* FREE(last_genomedp3_fwd); -- Do not free here, but at end */
/* FREE(last_genomedp5_fwd); -- Do not free here, but at end */
}
@@ -12013,12 +11900,8 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
queryaaseq_ptr,
#endif
queryseq_ptr,queryuc_ptr,querylength,chrnum,chroffset,chrhigh,
- maxpeelback,nullgap,
- extramaterial_paired,extraband_single,extraband_paired,
- pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_rev,last_genomedp3_rev,
- oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,maxintronlen,close_indels_mode,
- paired_favor_mode,zero_offset);
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_rev,last_genomedp3_rev,
+ oligoindices_minor,diagpool,cellpool);
/* FREE(last_genomedp5_rev); -- Do not free here, but at end */
/* FREE(last_genomedp3_rev); -- Do not free here, but at end */
}
@@ -12033,11 +11916,11 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
debug11(printf("Calling score_introns for path_fwd after path_compute_dir\n"));
pairs_fwd = score_introns(&max_intron_score_fwd,&avg_donor_score_fwd,&avg_acceptor_score_fwd,
&ncanonical_fwd,&nbadintrons_fwd,path_fwd,/*cdna_direction*/+1,watsonp,
- chrnum,chroffset,chrhigh,
+ chrnum,chroffset,chrhigh
#ifdef WASTE
- pairpool,
+ ,pairpool
#endif
- nullgap);
+ );
pairs_rev = assign_gap_types(path_rev,/*cdna_direction*/-1,watsonp,queryseq_ptr,
chrnum,chroffset,chrhigh,pairpool);
@@ -12045,11 +11928,11 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
debug11(printf("Calling score_introns for path_rev after path_compute_dir\n"));
pairs_rev = score_introns(&max_intron_score_rev,&avg_donor_score_rev,&avg_acceptor_score_rev,
&ncanonical_rev,&nbadintrons_rev,path_rev,/*cdna_direction*/-1,watsonp,
- chrnum,chroffset,chrhigh,
+ chrnum,chroffset,chrhigh
#ifdef WASTE
- pairpool,
+ ,pairpool
#endif
- nullgap);
+ );
if ((*cdna_direction = initial_cdna_direction(pairs_fwd,pairs_rev,
avg_donor_score_fwd,avg_acceptor_score_fwd,
@@ -12073,8 +11956,7 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
/* 2. 3' and 5' ends (possibly multiple) */
- debug(printf("Stage2 has %d starts and %d ends\n",
- List_length(Stage2_all_starts(stage2)),List_length(Stage2_all_ends(stage2))));
+ debug(printf("Stage2 has %d starts and %d ends\n",List_length(all_stage2_starts),List_length(all_stage2_ends)));
if (path_fwd == NULL) {
pairs_fwd = (List_T) NULL;
#ifdef DEBUG8
@@ -12083,22 +11965,17 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
#endif
} else {
/* 3' end */
- if (Stage2_all_ends(stage2) == NULL) {
+ if (all_stage2_ends == NULL) {
best_path = path_compute_end3(&fwd_ambig_end_length_3,&fwd_ambig_splicetype_3,&fwd_ambig_prob_3,
- defect_rate_fwd,path_fwd,/*cdna_direction*/+1,watsonp,genestrand,
+ defect_rate_fwd,path_fwd,/*cdna_direction*/+1,watsonp,
jump_late_p,querylength,
queryseq_ptr,queryuc_ptr,chrnum,chroffset,chrhigh,
knownsplice_limit_low,knownsplice_limit_high,
- maxpeelback,maxpeelback_distalmedial,nullgap,
- extramaterial_end,extraband_end,
- extramaterial_paired,extraband_single,extraband_paired,
- pairpool,dynprogL,dynprogM,dynprogR,do_final_p,
- oligoindices_minor,diagpool,sufflookback,nsufflookback,
- maxintronlen,close_indels_mode,paired_favor_mode,zero_offset);
+ maxpeelback,pairpool,dynprogL);
} else {
best_path = Pairpool_remove_gapholders(path_fwd); /* Pairpool_join cannot handle gapholders */
joined_ends = (List_T) NULL;
- for (p = Stage2_all_ends(stage2); p != NULL; p = List_next(p)) {
+ for (p = all_stage2_ends; p != NULL; p = List_next(p)) {
#ifdef PMAP
copy = Pairpool_join_end3(/*path*/path_fwd,/*end3_pairs*/(List_T) List_head(p),pairpool,/*copy_end_p*/false);
#else
@@ -12120,27 +11997,18 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
queryaaseq_ptr,
#endif
queryseq_ptr,queryuc_ptr,querylength,chrnum,chroffset,chrhigh,
- maxpeelback,nullgap,
- extramaterial_paired,extraband_single,extraband_paired,
- pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_fwd,last_genomedp3_fwd,
- oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,maxintronlen,close_indels_mode,
- paired_favor_mode,zero_offset);
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_fwd,last_genomedp3_fwd,
+ oligoindices_minor,diagpool,cellpool);
temp_path = path_compute_end3(&temp_ambig_end_length,&temp_ambig_splicetype,&temp_ambig_prob,
- defect_rate_temp,path_fwd,/*cdna_direction*/+1,watsonp,genestrand,
+ defect_rate_temp,path_fwd,/*cdna_direction*/+1,watsonp,
jump_late_p,querylength,
queryseq_ptr,queryuc_ptr,chrnum,chroffset,chrhigh,
knownsplice_limit_low,knownsplice_limit_high,
- maxpeelback,maxpeelback_distalmedial,nullgap,
- extramaterial_end,extraband_end,
- extramaterial_paired,extraband_single,extraband_paired,
- pairpool,dynprogL,dynprogM,dynprogR,do_final_p,
- oligoindices_minor,diagpool,sufflookback,nsufflookback,
- maxintronlen,close_indels_mode,paired_favor_mode,zero_offset);
+ maxpeelback,pairpool,dynprogL);
if (temp_path != NULL && end_compare(best_path,temp_path,/*cdna_direction*/+1,watsonp,
- chrnum,chroffset,chrhigh,nullgap,/*pairsp*/false) > 0) {
+ chrnum,chroffset,chrhigh,/*pairsp*/false) > 0) {
best_path = temp_path;
fwd_ambig_end_length_3 = temp_ambig_end_length;
fwd_ambig_splicetype_3 = temp_ambig_splicetype;
@@ -12156,22 +12024,16 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
/* 5' end */
pairs_fwd = List_reverse(best_path);
- if (Stage2_all_starts(stage2) == NULL) {
+ if (all_stage2_starts == NULL) {
best_pairs = path_compute_end5(&fwd_ambig_end_length_5,&fwd_ambig_splicetype_5,&fwd_ambig_prob_5,
- defect_rate_fwd,pairs_fwd,/*cdna_direction*/+1,watsonp,genestrand,
- jump_late_p,querylength,
+ defect_rate_fwd,pairs_fwd,/*cdna_direction*/+1,watsonp,jump_late_p,
queryseq_ptr,queryuc_ptr,chrnum,chroffset,chrhigh,
knownsplice_limit_low,knownsplice_limit_high,
- maxpeelback,maxpeelback_distalmedial,nullgap,
- extramaterial_end,extraband_end,
- extramaterial_paired,extraband_single,extraband_paired,
- pairpool,dynprogL,dynprogM,dynprogR,do_final_p,
- oligoindices_minor,diagpool,sufflookback,nsufflookback,
- maxintronlen,close_indels_mode,paired_favor_mode,zero_offset);
+ maxpeelback,pairpool,dynprogR);
} else {
best_pairs = Pairpool_remove_gapholders(pairs_fwd); /* Pairpool_join cannot handle gapholders */
joined_starts = (List_T) NULL;
- for (p = Stage2_all_starts(stage2); p != NULL; p = List_next(p)) {
+ for (p = all_stage2_starts; p != NULL; p = List_next(p)) {
#ifdef PMAP
copy = Pairpool_join_end5(/*pairs*/pairs_fwd,/*end5_path*/(List_T) List_head(p),pairpool,/*copy_end_p*/false);
#else
@@ -12193,26 +12055,17 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
queryaaseq_ptr,
#endif
queryseq_ptr,queryuc_ptr,querylength,chrnum,chroffset,chrhigh,
- maxpeelback,nullgap,
- extramaterial_paired,extraband_single,extraband_paired,
- pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_fwd,last_genomedp3_fwd,
- oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,maxintronlen,close_indels_mode,
- paired_favor_mode,zero_offset);
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_fwd,last_genomedp3_fwd,
+ oligoindices_minor,diagpool,cellpool);
temp_pairs = path_compute_end5(&temp_ambig_end_length,&temp_ambig_splicetype,&temp_ambig_prob,
defect_rate_temp,/*pairs*/List_reverse(path_fwd),
- /*cdna_direction*/+1,watsonp,genestrand,jump_late_p,querylength,
+ /*cdna_direction*/+1,watsonp,jump_late_p,
queryseq_ptr,queryuc_ptr,chrnum,chroffset,chrhigh,
knownsplice_limit_low,knownsplice_limit_high,
- maxpeelback,maxpeelback_distalmedial,nullgap,
- extramaterial_end,extraband_end,
- extramaterial_paired,extraband_single,extraband_paired,
- pairpool,dynprogL,dynprogM,dynprogR,do_final_p,
- oligoindices_minor,diagpool,sufflookback,nsufflookback,
- maxintronlen,close_indels_mode,paired_favor_mode,zero_offset);
+ maxpeelback,pairpool,dynprogR);
if (temp_pairs != NULL && end_compare(best_pairs,temp_pairs,/*cdna_direction*/+1,watsonp,
- chrnum,chroffset,chrhigh,nullgap,/*pairsp*/true) > 0) {
+ chrnum,chroffset,chrhigh,/*pairsp*/true) > 0) {
best_pairs = temp_pairs;
fwd_ambig_end_length_5 = temp_ambig_end_length;
fwd_ambig_splicetype_5 = temp_ambig_splicetype;
@@ -12239,23 +12092,18 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
#endif
} else {
/* 3' end */
- if (Stage2_all_ends(stage2) == NULL) {
+ if (all_stage2_ends == NULL) {
best_path = path_compute_end3(&rev_ambig_end_length_3,&rev_ambig_splicetype_3,&rev_ambig_prob_3,
- defect_rate_rev,path_rev,/*cdna_direction*/-1,watsonp,genestrand,
+ defect_rate_rev,path_rev,/*cdna_direction*/-1,watsonp,
jump_late_p,querylength,
queryseq_ptr,queryuc_ptr,chrnum,chroffset,chrhigh,
knownsplice_limit_low,knownsplice_limit_high,
- maxpeelback,maxpeelback_distalmedial,nullgap,
- extramaterial_end,extraband_end,
- extramaterial_paired,extraband_single,extraband_paired,
- pairpool,dynprogL,dynprogM,dynprogR,do_final_p,
- oligoindices_minor,diagpool,sufflookback,nsufflookback,
- maxintronlen,close_indels_mode,paired_favor_mode,zero_offset);
+ maxpeelback,pairpool,dynprogL);
} else {
best_path = Pairpool_remove_gapholders(path_rev); /* Pairpool_join cannot handle gapholders */
joined_ends = (List_T) NULL;
- for (p = Stage2_all_ends(stage2); p != NULL; p = List_next(p)) {
+ for (p = all_stage2_ends; p != NULL; p = List_next(p)) {
copy = Pairpool_join_end3(/*path*/path_rev,/*end3_pairs*/(List_T) List_head(p),pairpool,/*copy_end_p*/false);
joined_ends = List_push(joined_ends,(void *) copy);
}
@@ -12268,27 +12116,18 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
queryaaseq_ptr,
#endif
queryseq_ptr,queryuc_ptr,querylength,chrnum,chroffset,chrhigh,
- maxpeelback,nullgap,
- extramaterial_paired,extraband_single,extraband_paired,
- pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_rev,last_genomedp3_rev,
- oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,maxintronlen,close_indels_mode,
- paired_favor_mode,zero_offset);
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_rev,last_genomedp3_rev,
+ oligoindices_minor,diagpool,cellpool);
temp_path = path_compute_end3(&temp_ambig_end_length,&temp_ambig_splicetype,&temp_ambig_prob,
- defect_rate_temp,path_rev,/*cdna_direction*/-1,watsonp,genestrand,
+ defect_rate_temp,path_rev,/*cdna_direction*/-1,watsonp,
jump_late_p,querylength,
queryseq_ptr,queryuc_ptr,chrnum,chroffset,chrhigh,
knownsplice_limit_low,knownsplice_limit_high,
- maxpeelback,maxpeelback_distalmedial,nullgap,
- extramaterial_end,extraband_end,
- extramaterial_paired,extraband_single,extraband_paired,
- pairpool,dynprogL,dynprogM,dynprogR,do_final_p,
- oligoindices_minor,diagpool,sufflookback,nsufflookback,
- maxintronlen,close_indels_mode,paired_favor_mode,zero_offset);
+ maxpeelback,pairpool,dynprogL);
if (temp_path != NULL && end_compare(best_path,temp_path,/*cdna_direction*/-1,watsonp,
- chrnum,chroffset,chrhigh,nullgap,/*pairsp*/false) > 0) {
+ chrnum,chroffset,chrhigh,/*pairsp*/false) > 0) {
best_path = temp_path;
rev_ambig_end_length_3 = temp_ambig_end_length;
rev_ambig_splicetype_3 = temp_ambig_splicetype;
@@ -12304,23 +12143,17 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
/* 5' end */
pairs_rev = List_reverse(best_path);
- if (Stage2_all_starts(stage2) == NULL) {
+ if (all_stage2_starts == NULL) {
best_pairs = path_compute_end5(&rev_ambig_end_length_5,&rev_ambig_splicetype_5,&rev_ambig_prob_5,
- defect_rate_rev,pairs_rev,/*cdna_direction*/-1,watsonp,genestrand,
- jump_late_p,querylength,
+ defect_rate_rev,pairs_rev,/*cdna_direction*/-1,watsonp,jump_late_p,
queryseq_ptr,queryuc_ptr,chrnum,chroffset,chrhigh,
knownsplice_limit_low,knownsplice_limit_high,
- maxpeelback,maxpeelback_distalmedial,nullgap,
- extramaterial_end,extraband_end,
- extramaterial_paired,extraband_single,extraband_paired,
- pairpool,dynprogL,dynprogM,dynprogR,do_final_p,
- oligoindices_minor,diagpool,sufflookback,nsufflookback,
- maxintronlen,close_indels_mode,paired_favor_mode,zero_offset);
+ maxpeelback,pairpool,dynprogR);
} else {
best_pairs = Pairpool_remove_gapholders(pairs_rev); /* Pairpool_join cannot handle gapholders */
joined_starts = (List_T) NULL;
- for (p = Stage2_all_starts(stage2); p != NULL; p = List_next(p)) {
+ for (p = all_stage2_starts; p != NULL; p = List_next(p)) {
copy = Pairpool_join_end5(/*pairs*/pairs_rev,/*end5_path*/(List_T) List_head(p),pairpool,/*copy_end_p*/false);
joined_starts = List_push(joined_starts,(void *) copy);
}
@@ -12333,26 +12166,17 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
queryaaseq_ptr,
#endif
queryseq_ptr,queryuc_ptr,querylength,chrnum,chroffset,chrhigh,
- maxpeelback,nullgap,
- extramaterial_paired,extraband_single,extraband_paired,
- pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_rev,last_genomedp3_rev,
- oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,maxintronlen,close_indels_mode,
- paired_favor_mode,zero_offset);
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_rev,last_genomedp3_rev,
+ oligoindices_minor,diagpool,cellpool);
temp_pairs = path_compute_end5(&temp_ambig_end_length,&temp_ambig_splicetype,&temp_ambig_prob,
defect_rate_temp,/*pairs*/List_reverse(path_rev),
- /*cdna_direction*/-1,watsonp,genestrand,jump_late_p,querylength,
+ /*cdna_direction*/-1,watsonp,jump_late_p,
queryseq_ptr,queryuc_ptr,chrnum,chroffset,chrhigh,
knownsplice_limit_low,knownsplice_limit_high,
- maxpeelback,maxpeelback_distalmedial,nullgap,
- extramaterial_end,extraband_end,
- extramaterial_paired,extraband_single,extraband_paired,
- pairpool,dynprogL,dynprogM,dynprogR,do_final_p,
- oligoindices_minor,diagpool,sufflookback,nsufflookback,
- maxintronlen,close_indels_mode,paired_favor_mode,zero_offset);
+ maxpeelback,pairpool,dynprogR);
if (temp_pairs != NULL && end_compare(best_pairs,temp_pairs,/*cdna_direction*/-1,watsonp,
- chrnum,chroffset,chrhigh,nullgap,/*pairsp*/true) > 0) {
+ chrnum,chroffset,chrhigh,/*pairsp*/true) > 0) {
best_pairs = temp_pairs;
rev_ambig_end_length_5 = temp_ambig_end_length;
rev_ambig_splicetype_5 = temp_ambig_splicetype;
@@ -12390,14 +12214,8 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
queryaaseq_ptr,
#endif
queryseq_ptr,queryuc_ptr,chrnum,chroffset,chrhigh,
- knownsplice_limit_low,knownsplice_limit_high,
- maxpeelback,maxpeelback_distalmedial,nullgap,
- extramaterial_end,extraband_end,
- extramaterial_paired,extraband_single,extraband_paired,
- pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_fwd,last_genomedp3_fwd,
- do_final_p,oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,
- maxintronlen,close_indels_mode,paired_favor_mode,zero_offset);
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_fwd,last_genomedp3_fwd,
+ oligoindices_minor,diagpool,cellpool);
pairs_rev = path_compute_final(defect_rate_rev,pairs_rev,/*cdna_direction*/-1,
watsonp,genestrand,jump_late_p,querylength,
@@ -12405,14 +12223,8 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
queryaaseq_ptr,
#endif
queryseq_ptr,queryuc_ptr,chrnum,chroffset,chrhigh,
- knownsplice_limit_low,knownsplice_limit_high,
- maxpeelback,maxpeelback_distalmedial,nullgap,
- extramaterial_end,extraband_end,
- extramaterial_paired,extraband_single,extraband_paired,
- pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_rev,last_genomedp3_rev,
- do_final_p,oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,
- maxintronlen,close_indels_mode,paired_favor_mode,zero_offset);
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,last_genomedp5_rev,last_genomedp3_rev,
+ oligoindices_minor,diagpool,cellpool);
#ifdef DEBUG8
}
#endif
@@ -12439,18 +12251,6 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
debug11(Pair_dump_list(pairs_rev,true));
debug11(printf("\n"));
- if (diagnosticp == true) {
- if (pairs_fwd != NULL) {
- path_fwd = check_gaps(pairs_fwd,pairpool);
- pairs_fwd = List_reverse(path_fwd);
- }
- if (pairs_rev != NULL) {
- path_rev = check_gaps(pairs_rev,pairpool);
- pairs_rev = List_reverse(path_rev);
- }
- }
-
-
debug(printf("Intronscores: %f,%f fwd, %f,%f rev\n",
avg_donor_score_fwd,avg_acceptor_score_fwd,avg_donor_score_rev,avg_acceptor_score_rev));
if (pairs_rev == NULL) {
@@ -12466,33 +12266,33 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
debug11(printf("Calling score_introns for path_fwd before path_trim\n"));
pairs_fwd = score_introns(&max_intron_score_fwd,&avg_donor_score_fwd,&avg_acceptor_score_fwd,
&ncanonical_fwd,&nbadintrons_fwd,path_fwd,/*cdna_direction*/+1,watsonp,
- chrnum,chroffset,chrhigh,
+ chrnum,chroffset,chrhigh
#ifdef WASTE
- pairpool,
+ ,pairpool
#endif
- nullgap);
- alignment_score_fwd = score_alignment(&nmatches_fwd,&nmismatches_fwd,&nindels_fwd,
+ );
+ /* alignment_score_fwd = */ score_alignment(&nmatches_fwd,&nmismatches_fwd,&nindels_fwd,
#ifdef COMPLEX_DIRECTION
- &indel_alignment_score_fwd,
+ &indel_alignment_score_fwd,
#endif
- &nsemicanonical_fwd,&nnoncanonical_fwd,
- pairs_fwd,/*cdna_direction*/+1);
+ &nsemicanonical_fwd,&nnoncanonical_fwd,
+ pairs_fwd,/*cdna_direction*/+1);
path_rev = List_reverse(pairs_rev);
debug11(printf("Calling score_introns for path_rev before path_trim\n"));
pairs_rev = score_introns(&max_intron_score_rev,&avg_donor_score_rev,&avg_acceptor_score_rev,
&ncanonical_rev,&nbadintrons_rev,path_rev,/*cdna_direction*/-1,watsonp,
- chrnum,chroffset,chrhigh,
+ chrnum,chroffset,chrhigh
#ifdef WASTE
- pairpool,
+ ,pairpool
#endif
- nullgap);
- alignment_score_rev = score_alignment(&nmatches_rev,&nmismatches_rev,&nindels_rev,
+ );
+ /* alignment_score_rev = */ score_alignment(&nmatches_rev,&nmismatches_rev,&nindels_rev,
#ifdef COMPLEX_DIRECTION
- &indel_alignment_score_rev,
+ &indel_alignment_score_rev,
#endif
- &nsemicanonical_rev,&nnoncanonical_rev,
- pairs_rev,/*cdna_direction*/-1);
+ &nsemicanonical_rev,&nnoncanonical_rev,
+ pairs_rev,/*cdna_direction*/-1);
pairs_pretrim = pick_cdna_direction(&(*cdna_direction),&(*sensedir),pairs_fwd,pairs_rev,
defect_rate_fwd,defect_rate_rev,
@@ -12504,7 +12304,7 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
nmatches_fwd,nmismatches_fwd,nmatches_rev,nmismatches_rev,nindels_fwd,nindels_rev,
indel_alignment_score_fwd,indel_alignment_score_rev,
#endif
- alignment_score_fwd,alignment_score_rev,sense_filter);
+ sense_filter);
}
if (splicingp == false) {
*sensedir = SENSE_NULL;
@@ -12545,12 +12345,14 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
*finalpairs = path_trim(defect_rate,&(*ambig_end_length_5),&(*ambig_end_length_3),
&(*ambig_splicetype_5),&(*ambig_splicetype_3),
&(*ambig_prob_5),&(*ambig_prob_3),
- pairs_pretrim,&(*cdna_direction),&(*sensedir),watsonp,
- jump_late_p,querylength,queryseq_ptr,queryuc_ptr,
- chrnum,chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
- maxpeelback,maxpeelback_distalmedial,nullgap,
- extramaterial_end,extraband_end,
- pairpool,dynprogL,dynprogR,maxintronlen,paired_favor_mode,zero_offset);
+ pairs_pretrim,&(*cdna_direction),watsonp,
+ jump_late_p,querylength,
+#ifdef GSNAP
+ &(*sensedir),
+#endif
+ queryseq_ptr,queryuc_ptr,
+ chroffset,chrhigh,knownsplice_limit_low,knownsplice_limit_high,
+ maxpeelback,pairpool,dynprogL,dynprogR);
#ifdef DEBUG8
}
#endif
@@ -12560,9 +12362,9 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
/* printf("ambig_end_length = %d, %d\n",*ambig_end_length_5,*ambig_end_length_3); */
- pairarray = make_pairarray(&(*npairs),&(*finalpairs),*cdna_direction,*sensedir,watsonp,
+ pairarray = make_pairarray(&(*npairs),&(*finalpairs),*cdna_direction,watsonp,
pairpool,queryseq_ptr,chroffset,chrhigh,
- ngap,query_subseq_offset,skiplength,diagnosticp);
+ ngap,query_subseq_offset,skiplength);
*goodness = Pair_fracidentity_array(&(*matches),&(*unknowns),&(*mismatches),
&(*qopens),&(*qindels),&(*topens),&(*tindels),
&(*ncanonical),&(*nsemicanonical),&(*nnoncanonical),
@@ -12596,7 +12398,7 @@ Stage3_compute (List_T *finalpairs, int *npairs, int *goodness, int *cdna_direct
bool
Stage3_mergeable (Stage3_T firstpart, Stage3_T secondpart,
- int breakpoint, int queryntlength, int maxintronlen_bound) {
+ int breakpoint, int queryntlength) {
Pair_T end1, start2;
bool watsonp, connectablep = false;
Chrpos_T endchrpos1, startchrpos2;
@@ -12643,7 +12445,7 @@ Stage3_mergeable (Stage3_T firstpart, Stage3_T secondpart,
/* Deletion */
/* *genomejump = startchrpos2 - endchrpos1 - 1; */
debug20(printf("endchrpos1 < startchrpos2, so deletion of length %u\n",startchrpos2 - endchrpos1 - 1));
- if (startchrpos2 < endchrpos1 + maxintronlen_bound) {
+ if (startchrpos2 < endchrpos1 + maxintronlen) {
connectablep = true;
}
@@ -12666,7 +12468,7 @@ Stage3_mergeable (Stage3_T firstpart, Stage3_T secondpart,
/* Deletion */
/* *genomejump = endchrpos1 - startchrpos2 - 1; */
debug20(printf("startchrpos2 < endchrpos1, so deletion of length %u\n",endchrpos1 - startchrpos2 - 1));
- if (endchrpos1 < startchrpos2 + maxintronlen_bound) {
+ if (endchrpos1 < startchrpos2 + maxintronlen) {
connectablep = true;
}
} else if (endchrpos1 + (end1->querypos - start2->querypos) + 100 >= startchrpos2) {
@@ -12702,9 +12504,7 @@ bool
Stage3_merge_chimera (T this_left, T this_right,
int minpos1, int maxpos1, int minpos2, int maxpos2,
char *queryseq_ptr, char *queryuc_ptr, Pairpool_T pairpool,
- Dynprog_T dynprogL, Dynprog_T dynprogR,
- int maxpeelback, int maxpeelback_distalmedial,
- int nullgap, int extramaterial_end, int extraband_end, int ngap) {
+ Dynprog_T dynprogL, Dynprog_T dynprogR, int maxpeelback) {
List_T path;
bool knownsplicep, chop_exon_p;
int ambig_end_length_5 = 0, ambig_end_length_3 = 0; /* Need to be set for build_pairs_end5 and build_path_end3 */
@@ -12738,9 +12538,7 @@ Stage3_merge_chimera (T this_left, T this_right,
queryseq_ptr,queryuc_ptr,
this_left->cdna_direction,this_left->watsonp,
/*jump_late_p*/this_left->watsonp ? false : true,
- maxpeelback,maxpeelback_distalmedial,
- nullgap,extramaterial_end,extraband_end,
- /*defect_rate*/0.0,pairpool,dynprogL,
+ maxpeelback,/*defect_rate*/0.0,pairpool,dynprogL,
/*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
this_left->pairs = List_reverse(path);
@@ -12756,9 +12554,7 @@ Stage3_merge_chimera (T this_left, T this_right,
queryseq_ptr,queryuc_ptr,
this_right->cdna_direction,this_right->watsonp,
/*jump_late_p*/this_right->watsonp ? false : true,
- maxpeelback,maxpeelback_distalmedial,
- nullgap,extramaterial_end,extraband_end,
- /*defect_rate*/0.0,pairpool,dynprogR,
+ maxpeelback,/*defect_rate*/0.0,pairpool,dynprogR,
/*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
this_right->pairs = Pair_clip_bounded_list(this_right->pairs,minpos2,maxpos2);
@@ -12780,15 +12576,16 @@ void
Stage3_extend_right (T this, int goal, int querylength,
char *queryseq_ptr, char *queryuc_ptr,
bool max_extend_p, Pairpool_T pairpool,
- int ngap, int maxpeelback) {
+ int maxpeelback) {
List_T path, peeled_path;
- Pair_T leftpair, gappair;
+ Pair_T leftpair;
int nconsecutive_mismatches;
- int querypos, querydp5, genomedp5;
+ int querypos, querydp5;
+ Chrpos_T genomedp5;
int genomepos;
char c, c_upper, g, g_alt, comp;
- bool mismatchp, protectedp;
+ bool protectedp;
int n_peeled_indels;
int ncanonical, nsemicanonical;
@@ -12956,10 +12753,9 @@ Stage3_extend_right (T this, int goal, int querylength,
debug10(printf("END_LEFT AFTER FILL\n"));
Stage3_free_pairarray(&this);
- this->pairarray = make_pairarray(&this->npairs,&this->pairs,this->cdna_direction,this->sensedir,
+ this->pairarray = make_pairarray(&this->npairs,&this->pairs,this->cdna_direction,
this->watsonp,pairpool,queryseq_ptr,
- this->chroffset,this->chrhigh,ngap,/*subseq_offset*/0,/*skiplength*/0,
- /*diagnosticp*/false);
+ this->chroffset,this->chrhigh,ngap,/*subseq_offset*/0,/*skiplength*/0);
this->goodness = Pair_fracidentity_array(&this->matches,&this->unknowns,&this->mismatches,
&this->qopens,&this->qindels,&this->topens,&this->tindels,
&ncanonical,&nsemicanonical,&this->noncanonical,
@@ -12979,15 +12775,16 @@ void
Stage3_extend_left (T this, int goal,
char *queryseq_ptr, char *queryuc_ptr,
bool max_extend_p, Pairpool_T pairpool,
- int ngap, int maxpeelback) {
+ int maxpeelback) {
List_T pairs, peeled_pairs;
- Pair_T rightpair, gappair;
+ Pair_T rightpair;
int nconsecutive_mismatches;
- int querypos, querydp3, genomedp3;
+ int querypos, querydp3;
+ Chrpos_T genomedp3;
int genomepos;
char c, c_upper, g, g_alt, comp;
- bool mismatchp, protectedp;
+ bool protectedp;
int n_peeled_indels;
int ncanonical, nsemicanonical;
@@ -13153,10 +12950,9 @@ Stage3_extend_left (T this, int goal,
debug10(printf("END_RIGHT AFTER FILL\n"));
Stage3_free_pairarray(&this);
- this->pairarray = make_pairarray(&this->npairs,&this->pairs,this->cdna_direction,this->sensedir,
+ this->pairarray = make_pairarray(&this->npairs,&this->pairs,this->cdna_direction,
this->watsonp,pairpool,queryseq_ptr,
- this->chroffset,this->chrhigh,ngap,/*subseq_offset*/0,/*skiplength*/0,
- /*diagnosticp*/false);
+ this->chroffset,this->chrhigh,ngap,/*subseq_offset*/0,/*skiplength*/0);
this->goodness = Pair_fracidentity_array(&this->matches,&this->unknowns,&this->mismatches,
&this->qopens,&this->qindels,&this->topens,&this->tindels,
&ncanonical,&nsemicanonical,&this->noncanonical,
@@ -13191,11 +12987,11 @@ adjust_genomepos (T this, int delta) {
static bool
merge_local_single (T this_left, T this_right,
int minpos1, int maxpos1, int minpos2, int maxpos2,
- Sequence_T queryseq, char *queryseq_ptr, char *queryuc_ptr,
+ char *queryseq_ptr, char *queryuc_ptr,
Pairpool_T pairpool, Dynprog_T dynprogM,
- int maxpeelback, int extraband_single, int ngap) {
+ int maxpeelback) {
bool successp;
- Pair_T firstpair, lastpair, leftpair, rightpair;
+ Pair_T leftpair, rightpair;
List_T path;
bool watsonp, filledp;
@@ -13206,8 +13002,6 @@ merge_local_single (T this_left, T this_right,
#ifdef EXTRACT_GENOMICSEG
char *genomicseg_ptr = NULL;
#endif
- Univcoord_T left;
- int firstpos, lastpos;
int dynprogindex_minor = 0;
@@ -13241,14 +13035,12 @@ merge_local_single (T this_left, T this_right,
debug10(Pair_dump_list(this_right->pairs,true));
debug10(printf("END RIGHT\n"));
+#ifdef EXTRACT_GENOMICSEG
firstpair = (Pair_T) List_head(this_left->pairs);
lastpair = (Pair_T) List_last_value(this_right->pairs);
firstpos = firstpair->genomepos;
lastpos = lastpair->genomepos;
-
left = this_left->chroffset + firstpos;
-
-#ifdef EXTRACT_GENOMICSEG
genomicseg = Genome_get_segment(genome,left,genomiclength,/*chromosome_iit*/NULL,/*revcomp*/false);
genomicseg_ptr = genomicuc_ptr = Sequence_fullpointer(genomicseg);
#endif
@@ -13278,8 +13070,7 @@ merge_local_single (T this_left, T this_right,
queryseq_ptr,queryuc_ptr,/*querylength*/0,watsonp,
/*jump_late_p*/watsonp ? false : true,pairpool,dynprogM,
/*last_genomedp5*/NULL,/*last_genomedp3*/NULL,
- maxpeelback,extraband_single,/*defect_rate*/0,
- /*close_indels_mode*/+1,/*forcep*/false,/*finalp*/true)) == NULL) {
+ maxpeelback,/*defect_rate*/0,/*forcep*/false,/*finalp*/true)) == NULL) {
debug10(printf(" => failed\n"));
successp = false;
} else if (filledp == false) {
@@ -13308,14 +13099,12 @@ merge_local_single (T this_left, T this_right,
debug10(Pair_dump_list(this_right->pairs,true));
debug10(printf("END RIGHT\n"));
+#ifdef EXTRACT_GENOMICSEG
firstpair = (Pair_T) List_head(this_left->pairs);
lastpair = (Pair_T) List_last_value(this_right->pairs);
firstpos = firstpair->genomepos;
lastpos = lastpair->genomepos;
-
left = this_right->chroffset + lastpos;
-
-#ifdef EXTRACT_GENOMICSEG
genomicseg = Genome_get_segment(genome,left,genomiclength,/*chromosome_iit*/NULL,/*revcomp*/true);
genomicseg_ptr = genomicuc_ptr = Sequence_fullpointer(genomicseg);
#endif
@@ -13344,8 +13133,7 @@ merge_local_single (T this_left, T this_right,
queryseq_ptr,queryuc_ptr,/*querylength*/0,watsonp,
/*jump_late_p*/watsonp ? false : true,pairpool,dynprogM,
/*last_genomedp5*/NULL,/*last_genomedp3*/NULL,
- maxpeelback,extraband_single,/*defect_rate*/0,
- /*close_indels_mode*/+1,/*forcep*/false,/*finalp*/true)) == NULL) {
+ maxpeelback,/*defect_rate*/0,/*forcep*/false,/*finalp*/true)) == NULL) {
debug10(printf(" => failed\n"));
successp = false;
} else if (filledp == false) {
@@ -13359,19 +13147,17 @@ merge_local_single (T this_left, T this_right,
}
if (successp == false) {
- this_left->pairarray = make_pairarray(&this_left->npairs,&this_left->pairs,this_left->cdna_direction,this_left->sensedir,
+ this_left->pairarray = make_pairarray(&this_left->npairs,&this_left->pairs,this_left->cdna_direction,
this_left->watsonp,pairpool,queryseq_ptr,
- this_left->chroffset,this_left->chrhigh,ngap,/*subseq_offset*/0,/*skiplength*/0,
- /*diagnosticp*/false);
+ this_left->chroffset,this_left->chrhigh,ngap,/*subseq_offset*/0,/*skiplength*/0);
this_left->goodness = Pair_fracidentity_array(&this_left->matches,&this_left->unknowns,&this_left->mismatches,
&this_left->qopens,&this_left->qindels,&this_left->topens,&this_left->tindels,
&ncanonical,&nsemicanonical,&this_left->noncanonical,
&min_splice_prob,this_left->pairarray,this_left->npairs,this_left->cdna_direction);
- this_right->pairarray = make_pairarray(&this_right->npairs,&this_right->pairs,this_right->cdna_direction,this_right->sensedir,
+ this_right->pairarray = make_pairarray(&this_right->npairs,&this_right->pairs,this_right->cdna_direction,
this_right->watsonp,pairpool,queryseq_ptr,
- this_right->chroffset,this_right->chrhigh,ngap,/*subseq_offset*/0,/*skiplength*/0,
- /*diagnosticp*/false);
+ this_right->chroffset,this_right->chrhigh,ngap,/*subseq_offset*/0,/*skiplength*/0);
this_right->goodness = Pair_fracidentity_array(&this_right->matches,&this_right->unknowns,&this_right->mismatches,
&this_right->qopens,&this_right->qindels,&this_right->topens,&this_right->tindels,
&ncanonical,&nsemicanonical,&this_right->noncanonical,
@@ -13393,13 +13179,9 @@ recompute_for_cdna_direction (int *cdna_direction, List_T pairs, int genestrand,
char *queryseq_ptr, char *queryuc_ptr,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- int maxpeelback, int nullgap,
- Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
- int sufflookback, int nsufflookback, int maxintronlen_bound,
- int extramaterial_paired, int extraband_paired, int extraband_single,
- int paired_favor_mode, int zero_offset) {
+ int maxpeelback,
+ Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
List_T pairs_fwd, path_fwd, pairs_rev, path_rev, copy;
- int alignment_score_fwd, alignment_score_rev;
double max_intron_score_fwd = 0.0, max_intron_score_rev = 0.0,
avg_donor_score_fwd = 0.0, avg_acceptor_score_fwd = 0.0,
avg_donor_score_rev = 0.0, avg_acceptor_score_rev = 0.0;
@@ -13418,24 +13200,21 @@ recompute_for_cdna_direction (int *cdna_direction, List_T pairs, int genestrand,
queryaaseq_ptr,
#endif
queryseq_ptr,queryuc_ptr,/*querylength*/0,chrnum,chroffset,chrhigh,
- maxpeelback,nullgap,extramaterial_paired,extraband_single,extraband_paired,
- pairpool,dynprogL,dynprogM,dynprogR,/*last_genomedp5*/NULL,/*last_genomedp3*/NULL,
- oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,maxintronlen_bound,/*close_indels_mode*/+1,
- paired_favor_mode,zero_offset);
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,/*last_genomedp5*/NULL,/*last_genomedp3*/NULL,
+ oligoindices_minor,diagpool,cellpool);
pairs_fwd = score_introns(&max_intron_score_fwd,&avg_donor_score_fwd,&avg_acceptor_score_fwd,
&ncanonical_fwd,&nbadintrons_fwd,path_fwd,/*cdna_direction*/+1,watsonp,
- chrnum,chroffset,chrhigh,
+ chrnum,chroffset,chrhigh
#ifdef WASTE
- pairpool,
+ ,pairpool
#endif
- nullgap);
- alignment_score_fwd = score_alignment(&nmatches_fwd,&nmismatches_fwd,&nindels_fwd,
+ );
+ /* alignment_score_fwd = */ score_alignment(&nmatches_fwd,&nmismatches_fwd,&nindels_fwd,
#ifdef COMPLEX_DIRECTION
- &indel_alignment_score_fwd,
+ &indel_alignment_score_fwd,
#endif
- &nsemicanonical_fwd,&nnoncanonical_fwd,
- pairs_fwd,/*cdna_direction*/+1);
+ &nsemicanonical_fwd,&nnoncanonical_fwd,
+ pairs_fwd,/*cdna_direction*/+1);
/* Compute rev */
@@ -13445,24 +13224,21 @@ recompute_for_cdna_direction (int *cdna_direction, List_T pairs, int genestrand,
queryaaseq_ptr,
#endif
queryseq_ptr,queryuc_ptr,/*querylength*/0,chrnum,chroffset,chrhigh,
- maxpeelback,nullgap,extramaterial_paired,extraband_single,extraband_paired,
- pairpool,dynprogL,dynprogM,dynprogR,/*last_genomedp5*/NULL,/*last_genomedp3*/NULL,
- oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,maxintronlen_bound,/*close_indels_mode*/+1,
- paired_favor_mode,zero_offset);
+ maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,/*last_genomedp5*/NULL,/*last_genomedp3*/NULL,
+ oligoindices_minor,diagpool,cellpool);
pairs_rev = score_introns(&max_intron_score_rev,&avg_donor_score_rev,&avg_acceptor_score_rev,
&ncanonical_rev,&nbadintrons_rev,path_rev,/*cdna_direction*/-1,watsonp,
- chrnum,chroffset,chrhigh,
+ chrnum,chroffset,chrhigh
#ifdef WASTE
- pairpool,
+ ,pairpool
#endif
- nullgap);
- alignment_score_rev = score_alignment(&nmatches_rev,&nmismatches_rev,&nindels_rev,
+ );
+ /* alignment_score_rev = */ score_alignment(&nmatches_rev,&nmismatches_rev,&nindels_rev,
#ifdef COMPLEX_DIRECTION
- &indel_alignment_score_rev,
+ &indel_alignment_score_rev,
#endif
- &nsemicanonical_rev,&nnoncanonical_rev,
- pairs_rev,/*cdna_direction*/-1);
+ &nsemicanonical_rev,&nnoncanonical_rev,
+ pairs_rev,/*cdna_direction*/-1);
pairs = pick_cdna_direction(&(*cdna_direction),&sensedir,pairs_fwd,pairs_rev,
defect_rate_fwd,defect_rate_rev,
@@ -13474,7 +13250,7 @@ recompute_for_cdna_direction (int *cdna_direction, List_T pairs, int genestrand,
nmatches_fwd,nmismatches_fwd,nmatches_rev,nmismatches_rev,nindels_fwd,nindels_rev,
indel_alignment_score_fwd,indel_alignment_score_rev,
#endif
- alignment_score_fwd,alignment_score_rev,/*sense_filter*/0);
+ /*sense_filter*/0);
/* Don't know if we need to call path_compute_final */
@@ -13488,18 +13264,13 @@ Stage3_merge_local (T this_left, T this_right,
#ifdef PMAP
char *queryaaseq_ptr,
#endif
- Sequence_T queryseq, char *queryseq_ptr, char *queryuc_ptr,
+ char *queryseq_ptr, char *queryuc_ptr,
Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- int maxpeelback, int nullgap,
- Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
- int sufflookback, int nsufflookback, int maxintronlen_bound,
- int extramaterial_paired, int extraband_paired, int extraband_single, int ngap,
- int paired_favor_mode, int zero_offset) {
- Pair_T end1, start2, firstpair, lastpair, leftpair, rightpair;
+ int maxpeelback,
+ Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
+ Pair_T end1, start2, leftpair, rightpair;
List_T left_pairs, right_pairs, path;
bool watsonp, filledp, shiftp, incompletep;
- Univcoord_T left;
- int firstpos, lastpos;
int cdna_direction, cdna_direction_1, cdna_direction_2;
bool make_dir_consistent_p;
@@ -13545,6 +13316,7 @@ Stage3_merge_local (T this_left, T this_right,
watsonp = this_left->watsonp;
+#if 0
if (watsonp == true) {
debug10(printf("watsonp true\n"));
@@ -13552,7 +13324,6 @@ Stage3_merge_local (T this_left, T this_right,
lastpair = (Pair_T) List_last_value(this_right->pairs);
firstpos = firstpair->genomepos;
lastpos = lastpair->genomepos;
-
left = this_left->chroffset + firstpos;
} else {
@@ -13562,10 +13333,9 @@ Stage3_merge_local (T this_left, T this_right,
lastpair = (Pair_T) List_last_value(this_right->pairs);
firstpos = firstpair->genomepos;
lastpos = lastpair->genomepos;
-
left = this_right->chroffset + lastpos;
}
-
+#endif
/* Determine if need to make cdna_direction consistent */
end1 = Pair_end_bound(&cdna_direction_1,this_left->pairs,/*breakpoint*/maxpos1);
@@ -13625,11 +13395,8 @@ Stage3_merge_local (T this_left, T this_right,
recompute_for_cdna_direction(&cdna_direction,this_left->pairs,genestrand,watsonp,
queryseq_ptr,queryuc_ptr,
this_left->chrnum,this_left->chroffset,this_left->chrhigh,
- pairpool,dynprogL,dynprogM,dynprogR,maxpeelback,nullgap,
- oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,maxintronlen_bound,
- extramaterial_paired,extraband_paired,extraband_single,
- paired_favor_mode,zero_offset);
+ pairpool,dynprogL,dynprogM,dynprogR,maxpeelback,
+ oligoindices_minor,diagpool,cellpool);
} else {
debug10(printf("traverse_genome_gap with cdna_direction %d...",cdna_direction));
@@ -13639,8 +13406,7 @@ Stage3_merge_local (T this_left, T this_right,
queryseq_ptr,queryuc_ptr,/*querylength*/0,cdna_direction,watsonp,
/*jump_late_p*/watsonp ? false : true,pairpool,
dynprogL,dynprogM,dynprogR,/*last_genomedp5*/NULL,/*last_genomedp3*/NULL,
- maxpeelback,extramaterial_paired,extraband_paired,extraband_single,
- /*defect_rate*/0,/*close_indels_mode*/+1,/*finalp*/true,/*simplep*/false);
+ maxpeelback,/*defect_rate*/0,/*finalp*/true,/*simplep*/false);
debug10(printf("done"));
if (filledp == false) {
@@ -13653,10 +13419,9 @@ Stage3_merge_local (T this_left, T this_right,
this_right->pairs = (List_T) NULL;
}
- if (make_pairarray_merge(this_left,cdna_direction,this_left->sensedir,
- this_left->watsonp,pairpool,queryseq_ptr,
+ if (make_pairarray_merge(this_left,cdna_direction,this_left->watsonp,pairpool,queryseq_ptr,
this_left->chroffset,this_left->chrhigh,ngap,/*subseq_offset*/0,/*skiplength*/0,
- /*diagnosticp*/false,/*new_gap_p*/true) == false) {
+ /*new_gap_p*/true) == false) {
return false;
}
@@ -13681,11 +13446,8 @@ Stage3_merge_local (T this_left, T this_right,
recompute_for_cdna_direction(&cdna_direction,this_left->pairs,genestrand,watsonp,
queryseq_ptr,queryuc_ptr,
this_left->chrnum,this_left->chroffset,this_left->chrhigh,
- pairpool,dynprogL,dynprogM,dynprogR,maxpeelback,nullgap,
- oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,maxintronlen_bound,
- extramaterial_paired,extraband_paired,extraband_single,
- paired_favor_mode,zero_offset);
+ pairpool,dynprogL,dynprogM,dynprogR,maxpeelback,
+ oligoindices_minor,diagpool,cellpool);
} else {
debug10(printf("traverse_cdna_gap..."));
@@ -13695,8 +13457,7 @@ Stage3_merge_local (T this_left, T this_right,
queryseq_ptr,queryuc_ptr,/*querylength*/0,cdna_direction,watsonp,
/*jump_late_p*/watsonp ? false : true,pairpool,
dynprogL,dynprogM,dynprogR,/*last_genomedp5*/NULL,/*last_genomedp3*/NULL,
- maxpeelback,extramaterial_paired,extraband_paired,extraband_single,
- /*defect_rate*/0,/*close_indels_mode*/+1,/*finalp*/true);
+ maxpeelback,/*defect_rate*/0,/*finalp*/true);
debug10(printf("done"));
if (filledp == false) {
@@ -13709,10 +13470,9 @@ Stage3_merge_local (T this_left, T this_right,
this_right->pairs = (List_T) NULL;
}
- if (make_pairarray_merge(this_left,cdna_direction,this_left->sensedir,
- this_left->watsonp,pairpool,queryseq_ptr,
+ if (make_pairarray_merge(this_left,cdna_direction,this_left->watsonp,pairpool,queryseq_ptr,
this_left->chroffset,this_left->chrhigh,ngap,/*subseq_offset*/0,/*skiplength*/0,
- /*diagnosticp*/false,/*new_gap_p*/true) == false) {
+ /*new_gap_p*/true) == false) {
return false;
}
@@ -13730,8 +13490,8 @@ Stage3_merge_local (T this_left, T this_right,
if (merge_local_single(this_left,this_right,
minpos1,/*maxpos1*/end1->querypos,
/*minpos2*/start2->querypos,maxpos2,
- queryseq,queryseq_ptr,queryuc_ptr,
- pairpool,dynprogM,maxpeelback,extraband_single,ngap) == false) {
+ queryseq_ptr,queryuc_ptr,
+ pairpool,dynprogM,maxpeelback) == false) {
return false;
} else if (make_dir_consistent_p == true) {
@@ -13740,17 +13500,13 @@ Stage3_merge_local (T this_left, T this_right,
recompute_for_cdna_direction(&cdna_direction,this_left->pairs,genestrand,watsonp,
queryseq_ptr,queryuc_ptr,
this_left->chrnum,this_left->chroffset,this_left->chrhigh,
- pairpool,dynprogL,dynprogM,dynprogR,maxpeelback,nullgap,
- oligoindices_minor,diagpool,cellpool,
- sufflookback,nsufflookback,maxintronlen_bound,
- extramaterial_paired,extraband_paired,extraband_single,
- paired_favor_mode,zero_offset);
+ pairpool,dynprogL,dynprogM,dynprogR,maxpeelback,
+ oligoindices_minor,diagpool,cellpool);
}
- if (make_pairarray_merge(this_left,cdna_direction,this_left->sensedir,
- this_left->watsonp,pairpool,queryseq_ptr,
+ if (make_pairarray_merge(this_left,cdna_direction,this_left->watsonp,pairpool,queryseq_ptr,
this_left->chroffset,this_left->chrhigh,ngap,/*subseq_offset*/0,/*skiplength*/0,
- /*diagnosticp*/false,/*new_gap_p*/false) == false) {
+ /*new_gap_p*/false) == false) {
return false;
}
diff --git a/src/stage3.h b/src/stage3.h
index 1ef1adc..6287f0c 100644
--- a/src/stage3.h
+++ b/src/stage3.h
@@ -1,4 +1,4 @@
-/* $Id: stage3.h 157977 2015-02-03 18:46:53Z twu $ */
+/* $Id: stage3.h 166641 2015-05-29 21:13:04Z twu $ */
#ifndef STAGE3_INCLUDED
#define STAGE3_INCLUDED
@@ -30,6 +30,8 @@ typedef struct Stage3_T *Stage3_T;
#else
#include "oligoindex_hr.h"
#endif
+#include "filestring.h"
+
#ifndef GSNAP
#include "gregion.h"
@@ -56,6 +58,10 @@ Stage3_setup (bool splicingp_in, bool novelsplicingp_in, bool require_splicedir_
int donor_typeint_in, int acceptor_typeint_in,
Univcoord_T *splicesites_in,
int min_intronlength_in, int max_deletionlength_in, int min_indel_end_matches_in,
+ int maxpeelback_distalmedial_in, int nullgap_in,
+ int extramaterial_end_in, int extramaterial_paired_in,
+ int extraband_single_in, int extraband_end_in, int extraband_paired_in,
+ int ngap_in, int maxintronlen_in,
bool output_sam_p_in, bool homopolymerp_in, Stage3debug_T stage3debug_in);
extern bool
@@ -171,7 +177,7 @@ Stage3_test_bounds (T this, int minpos, int maxpos);
extern void
Stage3_translate_cdna (T this, Sequence_T queryaaseq, bool strictp);
extern void
-Stage3_backtranslate_cdna (T this, bool diagnosticp);
+Stage3_backtranslate_cdna (T this);
#else
extern void
Stage3_translate_genomic (T this, int npairs, bool fulllengthp, int cds_startpos, int querylength,
@@ -184,73 +190,69 @@ Stage3_fix_cdna_direction (T this, T reference);
extern void
Stage3_translate (T this,
#ifdef PMAP
- Sequence_T queryseq, bool diagnosticp,
+ Sequence_T queryseq,
#endif
int querylength, bool fulllengthp,
- int cds_startpos, bool truncatep, bool strictp,
- bool maponlyp);
+ int cds_startpos, bool truncatep, bool strictp);
extern void
Stage3_translate_chimera (T this, T mate,
#ifdef PMAP
- Sequence_T queryseq, bool diagnosticp,
+ Sequence_T queryseq,
#endif
int querylength, bool fulllengthp,
- int cds_startpos, bool truncatep, bool strictp,
- bool maponlyp);
+ int cds_startpos, bool truncatep, bool strictp);
extern void
-Stage3_print_pathsummary (FILE *fp, T this, int pathnum, Univ_IIT_T chromosome_iit, Univ_IIT_T contig_iit,
- IIT_T altstrain_iit, Sequence_T queryseq,
- char *dbversion, int maxmutations, bool diagnosticp, bool maponlyp);
+Stage3_print_pathsummary (Filestring_T fp, T this, int pathnum, Univ_IIT_T chromosome_iit, Univ_IIT_T contig_iit,
+ IIT_T altstrain_iit, Sequence_T queryseq, char *dbversion, int maxmutations);
extern void
-Stage3_print_pslformat_nt (FILE *fp, T this, Univ_IIT_T chromosome_iit, Sequence_T usersegment, Sequence_T queryseq);
+Stage3_print_pslformat_nt (Filestring_T fp, T this, Univ_IIT_T chromosome_iit, Sequence_T usersegment, Sequence_T queryseq);
#ifdef PMAP
extern void
-Stage3_print_pslformat_pro (FILE *fp, T this, Univ_IIT_T chromosome_iit, Sequence_T usersegment, Sequence_T queryseq, bool strictp);
+Stage3_print_pslformat_pro (Filestring_T fp, T this, Univ_IIT_T chromosome_iit, Sequence_T usersegment, Sequence_T queryseq, bool strictp);
#endif
extern void
-Stage3_print_gff3 (FILE *fp, T this, int pathnum, Univ_IIT_T chromosome_iit, Sequence_T usersegment,
+Stage3_print_gff3 (Filestring_T fp, T this, int pathnum, Univ_IIT_T chromosome_iit, Sequence_T usersegment,
Sequence_T queryseq, int querylength, Printtype_T printtype, char *sourcename);
#ifndef PMAP
extern void
-Stage3_print_sam (FILE *fp, char *abbrev, T this, int pathnum, int npaths,
+Stage3_print_sam (Filestring_T fp, char *abbrev, T this, int pathnum, int npaths,
int absmq_score, int first_absmq, int second_absmq, int mapq_score,
Univ_IIT_T chromosome_iit, Sequence_T usersegment,
Sequence_T queryseq, int chimera_part, Chimera_T chimera,
int quality_shift, bool sam_paired_p, char *sam_read_group_id);
#endif
extern void
-Stage3_print_iit_map (FILE *fp, T this, Univ_IIT_T chromosome_iit, Sequence_T queryseq);
+Stage3_print_iit_map (Filestring_T fp, T this, Univ_IIT_T chromosome_iit, Sequence_T queryseq);
extern void
-Stage3_print_iit_exon_map (FILE *fp, T this, Univ_IIT_T chromosome_iit, Sequence_T queryseq);
+Stage3_print_iit_exon_map (Filestring_T fp, T this, Univ_IIT_T chromosome_iit, Sequence_T queryseq);
extern void
-Stage3_print_splicesites (FILE *fp, T this, Univ_IIT_T chromosome_iit, Sequence_T queryseq);
+Stage3_print_splicesites (Filestring_T fp, T this, Univ_IIT_T chromosome_iit, Sequence_T queryseq);
extern void
-Stage3_print_introns (FILE *fp, T this, Univ_IIT_T chromosome_iit, Sequence_T queryseq);
+Stage3_print_introns (Filestring_T fp, T this, Univ_IIT_T chromosome_iit, Sequence_T queryseq);
extern void
-Stage3_print_mutations (FILE *fp, T this, T reference, Univ_IIT_T chromosome_iit, Sequence_T queryseq,
- char *dbversion, bool showalignp, bool diagnosticp,
- int invertmode, bool nointronlenp, int wraplength,
- int maxmutations);
+Stage3_print_mutations (Filestring_T fp, T this, T reference, Univ_IIT_T chromosome_iit, Sequence_T queryseq,
+ char *dbversion, bool showalignp,
+ int invertmode, bool nointronlenp, int wraplength, int maxmutations);
extern void
-Stage3_print_map (FILE *fp, T this, IIT_T map_iit, int *map_divint_crosstable, Univ_IIT_T chromosome_iit,
+Stage3_print_map (Filestring_T fp, T this, IIT_T map_iit, int *map_divint_crosstable, Univ_IIT_T chromosome_iit,
int pathnum, bool map_exons_p, bool map_bothstrands_p, int nflanking, bool print_comment_p);
extern void
-Stage3_print_alignment (FILE *fp, T this, Genome_T genome,
+Stage3_print_alignment (Filestring_T fp, T this, Genome_T genome,
Univ_IIT_T chromosome_iit, Printtype_T printtype,
- bool continuousp, bool continuous_by_exon_p, bool diagnosticp, bool genomefirstp,
+ bool continuousp, bool continuous_by_exon_p, bool genomefirstp,
int invertmode, bool nointronlenp, int wraplength);
extern void
-Stage3_print_coordinates (FILE *fp, T this, Univ_IIT_T chromosome_iit, int invertmode);
+Stage3_print_coordinates (Filestring_T fp, T this, Univ_IIT_T chromosome_iit, int invertmode);
extern void
-Stage3_print_cdna (FILE *fp, T this, int wraplength);
+Stage3_print_cdna (Filestring_T fp, T this, int wraplength);
extern void
-Stage3_print_protein_genomic (FILE *fp, T this, int wraplength);
+Stage3_print_protein_genomic (Filestring_T fp, T this, int wraplength);
extern void
-Stage3_print_compressed (FILE *fp, T this, Sequence_T queryseq, Univ_IIT_T chromosome_iit,
+Stage3_print_compressed (Filestring_T fp, T this, Sequence_T queryseq, Univ_IIT_T chromosome_iit,
char *dbversion, Sequence_T usersegment, int pathnum, int npaths,
bool checksump, int chimerapos, int chimeraequivpos,
double donor_prob, double acceptor_prob, int chimera_cdna_direction);
@@ -280,7 +282,7 @@ Stage3_compute (List_T *pairs, int *npairs, int *goodness, int *cdna_direction,
double *ambig_prob_5, double *ambig_prob_3,
int *unknowns, int *mismatches, int *qopens, int *qindels, int *topens, int *tindels,
int *ncanonical, int *nsemicanonical, int *nnoncanonical, double *min_splice_prob,
- Stage2_T stage2,
+ List_T stage2pairs, List_T all_stage2_starts, List_T all_stage2_ends,
#ifdef PMAP
char *queryaaseq_ptr,
#endif
@@ -289,15 +291,10 @@ Stage3_compute (List_T *pairs, int *npairs, int *goodness, int *cdna_direction,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
Univcoord_T knownsplice_limit_low, Univcoord_T knownsplice_limit_high,
bool watsonp, int genestrand, bool jump_late_p,
- int maxpeelback, int maxpeelback_distalmedial, int nullgap,
- int extramaterial_end, int extramaterial_paired,
- int extraband_single, int extraband_end, int extraband_paired, int minendexon,
+ int maxpeelback,
Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- int ngap, bool diagnosticp, bool checkp,
- bool do_final_p, int sense_try, int sense_filter,
- Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
- int sufflookback, int nsufflookback, int maxintronlen, int close_indels_mode,
- int paired_favor_mode, int zero_offset);
+ int sense_try, int sense_filter,
+ Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool);
#ifndef GSNAP
extern T
@@ -312,25 +309,23 @@ Stage3_direct (Gregion_T gregion,
#endif
extern bool
-Stage3_mergeable (Stage3_T firstpart, Stage3_T secondpart,
- int exonexonpos, int queryntlength, int maxintronlen_bound);
+Stage3_mergeable (Stage3_T firstpart, Stage3_T secondpart, int exonexonpos, int queryntlength);
extern bool
Stage3_merge_chimera (T this_left, T this_right,
int minpos1, int maxpos1, int minpos2, int maxpos2,
char *queryseq_ptr, char *queryuc_ptr, Pairpool_T pairpool,
- Dynprog_T dynprogL, Dynprog_T dynprogR, int maxpeelback, int maxpeelback_distalmedial,
- int nullgap, int extramaterial_end, int extraband_end, int ngap);
+ Dynprog_T dynprogL, Dynprog_T dynprogR, int maxpeelback);
extern void
Stage3_extend_right (T this, int goal, int querylength,
char *queryseq_ptr, char *queryuc_ptr,
bool max_extend_p, Pairpool_T pairpool,
- int ngap, int maxpeelback);
+ int maxpeelback);
extern void
Stage3_extend_left (T this, int goal,
char *queryseq_ptr, char *queryuc_ptr,
bool max_extend_p, Pairpool_T pairpool,
- int ngap, int maxpeelback);
+ int maxpeelback);
extern bool
Stage3_merge_local (T this_left, T this_right,
@@ -338,13 +333,10 @@ Stage3_merge_local (T this_left, T this_right,
#ifdef PMAP
char *queryaaseq_ptr,
#endif
- Sequence_T queryseq, char *queryseq_ptr, char *queryuc_ptr,
+ char *queryseq_ptr, char *queryuc_ptr,
Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- int maxpeelback, int nullgap,
- Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
- int sufflookback, int nsufflookback, int maxintronlen_bound,
- int extramaterial_paired, int extraband_paired, int extraband_single, int ngap,
- int paired_favor_mode, int zero_offset);
+ int maxpeelback,
+ Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool);
#ifndef PMAP
extern void
diff --git a/src/stage3hr.c b/src/stage3hr.c
index 0ea9073..b3ee14d 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 164703 2015-05-01 20:24:10Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 167162 2015-06-09 20:53:13Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -13,23 +13,35 @@ static char rcsid[] = "$Id: stage3hr.c 164703 2015-05-01 20:24:10Z twu $";
#include "assert.h"
#include "mem.h"
#include "chrnum.h"
-/* #include "complement.h" */
+#include "complement.h"
#include "interval.h"
#include "listdef.h"
#include "substring.h"
+#include "junction.h"
#include "genome128_hr.h"
#include "mapq.h"
#include "pair.h" /* For Pair_print_gsnap and Pair_compute_mapq */
+#include "comp.h" /* For Stage3end_run_gmap */
#include "maxent_hr.h"
#include "fastlog.h"
-#if 0
+/* Scores for amb_status_inside */
+#define AMB_RESOLVED_BYLENGTH 0
+#define AMB_RESOLVED_BYMATCHES 0
+#define AMB_NOT_AMBIGUOUS 1
+#define AMB_UNRESOLVED_MULTIPLE 2 /* Not so preferable, since none had the expected length */
+#define AMB_UNRESOLVED_TOOCLOSE 3 /* Worse than multiple, since no options work */
+
+
+
/* Originally added to avoid CIGAR strings like 1S99H, but results in
errors if first chromosome is circular. Now checking in samprint.c
- whether the CIGAR string is bad */
+ whether the CIGAR string is bad. But now needed again because we
+ are allowing alignments that are out of bounds. */
#define SOFT_CLIPS_AVOID_CIRCULARIZATION 1
-#endif
+
+#define TRANSLOC_SPECIAL 1 /* Eliminates translocations if non-translocations are found */
#define MAX_HITS 100000
@@ -46,7 +58,7 @@ static char rcsid[] = "$Id: stage3hr.c 164703 2015-05-01 20:24:10Z twu $";
/* #define TERMINAL_SECOND_CLASS 1 -- enabling this leads to poor results */
#define TERMINAL_COMPUTE_MINLENGTH 40
-#define SCORE_INDELS 1 /* Needed to compare genomic positions with and without indels */
+/* #define SCORE_INDELS 1 -- Needed to compare genomic positions with and without indels */
#define OUTERLENGTH_SLOP 100
@@ -160,7 +172,7 @@ static char rcsid[] = "$Id: stage3hr.c 164703 2015-05-01 20:24:10Z twu $";
#define debug12(x)
#endif
-/* Stage3pair_overlap */
+/* substring_gmap */
#ifdef DEBUG13
#define debug13(x) x
#else
@@ -174,6 +186,13 @@ static char rcsid[] = "$Id: stage3hr.c 164703 2015-05-01 20:24:10Z twu $";
#define debug14(x)
#endif
+/* Stage3pair_overlap */
+#ifdef DEBUG15
+#define debug15(x) x
+#else
+#define debug15(x)
+#endif
+
#define MAPQ_MAXIMUM_SCORE 40
@@ -182,6 +201,12 @@ static char rcsid[] = "$Id: stage3hr.c 164703 2015-05-01 20:24:10Z twu $";
static bool want_random_p;
static bool invert_first_p;
static bool invert_second_p;
+static Genome_T genome;
+
+static Univ_IIT_T chromosome_iit;
+static int nchromosomes;
+static int circular_typeint;
+
static IIT_T genes_iit;
static int *genes_divint_crosstable;
static IIT_T tally_iit;
@@ -192,15 +217,16 @@ static int *runlength_divint_crosstable;
static int reject_trimlength;
static int pairmax;
-#ifdef USE_BINGO
+#if 0
static int expected_pairlength;
static int pairlength_deviation;
-#endif
-
+#else
static int expected_pairlength_low;
static int expected_pairlength_high;
static int expected_pairlength_very_high;
+#endif
+static int amb_penalty = 2;
static int localsplicing_penalty;
static int indel_penalty_middle;
static int antistranded_penalty;
@@ -215,7 +241,6 @@ static bool merge_samechr_p;
static bool *circularp;
static char *failedinput_root;
-static bool fastq_format_p;
static bool print_m8_p;
@@ -226,7 +251,8 @@ static bool favor_ambiguous_p;
void
-Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in,
+Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in, Genome_T genome_in,
+ Univ_IIT_T chromosome_iit_in, int nchromosomes_in, int circular_typeint_in,
IIT_T genes_iit_in, int *genes_divint_crosstable_in,
IIT_T tally_iit_in, int *tally_divint_crosstable_in,
IIT_T runlength_iit_in, int *runlength_divint_crosstable_in,
@@ -236,10 +262,15 @@ Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in,
int antistranded_penalty_in, bool favor_multiexon_p_in,
int gmap_min_nconsecutive_in, int index1part,
int index1interval, bool novelsplicingp_in, bool merge_samechr_p_in,
- bool *circularp_in, char *failedinput_root_in, bool fastq_format_p_in,
+ bool *circularp_in, char *failedinput_root_in,
bool print_m8_p_in, bool want_random_p_in) {
invert_first_p = invert_first_p_in;
invert_second_p = invert_second_p_in;
+ genome = genome_in;
+
+ chromosome_iit = chromosome_iit_in;
+ nchromosomes = nchromosomes_in;
+ circular_typeint = circular_typeint_in;
genes_iit = genes_iit_in;
genes_divint_crosstable = genes_divint_crosstable_in;
tally_iit = tally_iit_in;
@@ -279,7 +310,6 @@ Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in,
circularp = circularp_in;
failedinput_root = failedinput_root_in;
- fastq_format_p = fastq_format_p_in;
print_m8_p = print_m8_p_in;
want_random_p = want_random_p_in;
@@ -288,135 +318,6 @@ Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in,
}
-static FILE *fp_failedinput_1;
-static FILE *fp_failedinput_2;
-
-static FILE *fp_nomapping;
-static FILE *fp_unpaired_uniq;
-static FILE *fp_unpaired_circular;
-static FILE *fp_unpaired_transloc;
-static FILE *fp_unpaired_mult;
-static FILE *fp_unpaired_mult_xs_1;
-static FILE *fp_unpaired_mult_xs_2;
-static FILE *fp_halfmapping_uniq;
-static FILE *fp_halfmapping_circular;
-static FILE *fp_halfmapping_transloc;
-static FILE *fp_halfmapping_mult;
-static FILE *fp_halfmapping_mult_xs_1;
-static FILE *fp_halfmapping_mult_xs_2;
-static FILE *fp_paired_uniq_circular;
-static FILE *fp_paired_uniq_inv;
-static FILE *fp_paired_uniq_scr;
-static FILE *fp_paired_uniq_long;
-static FILE *fp_paired_mult;
-static FILE *fp_paired_mult_xs_1;
-static FILE *fp_paired_mult_xs_2;
-static FILE *fp_concordant_uniq;
-static FILE *fp_concordant_circular;
-static FILE *fp_concordant_transloc;
-static FILE *fp_concordant_mult;
-static FILE *fp_concordant_mult_xs_1;
-static FILE *fp_concordant_mult_xs_2;
-
-
-void
-Stage3hr_file_setup_single (FILE *fp_failedinput_in, FILE *fp_nomapping_in,
- FILE *fp_unpaired_uniq_in, FILE *fp_unpaired_circular_in, FILE *fp_unpaired_transloc_in,
- FILE *fp_unpaired_mult_in, FILE *fp_unpaired_mult_xs_1_in) {
-
- fp_failedinput_1 = fp_failedinput_in;
-
- fp_nomapping = fp_nomapping_in;
- fp_unpaired_uniq = fp_unpaired_uniq_in;
- fp_unpaired_circular = fp_unpaired_circular_in;
- fp_unpaired_transloc = fp_unpaired_transloc_in;
- fp_unpaired_mult = fp_unpaired_mult_in;
- fp_unpaired_mult_xs_1 = fp_unpaired_mult_xs_1_in;
-
- return;
-}
-
-void
-Stage3hr_file_setup_paired (FILE *fp_failedinput_1_in, FILE *fp_failedinput_2_in, FILE *fp_nomapping_in,
- FILE *fp_halfmapping_uniq_in, FILE *fp_halfmapping_circular_in, FILE *fp_halfmapping_transloc_in,
- FILE *fp_halfmapping_mult_in, FILE *fp_halfmapping_mult_xs_1_in, FILE *fp_halfmapping_mult_xs_2_in,
- FILE *fp_paired_uniq_circular_in, FILE *fp_paired_uniq_inv_in, FILE *fp_paired_uniq_scr_in,
- FILE *fp_paired_uniq_long_in, FILE *fp_paired_mult_in, FILE *fp_paired_mult_xs_1_in, FILE *fp_paired_mult_xs_2_in,
- FILE *fp_concordant_uniq_in, FILE *fp_concordant_circular_in, FILE *fp_concordant_transloc_in,
- FILE *fp_concordant_mult_in, FILE *fp_concordant_mult_xs_1_in, FILE *fp_concordant_mult_xs_2_in) {
-
- fp_failedinput_1 = fp_failedinput_1_in;
- fp_failedinput_2 = fp_failedinput_2_in;
-
- fp_nomapping = fp_nomapping_in;
- fp_halfmapping_uniq = fp_halfmapping_uniq_in;
- fp_halfmapping_circular = fp_halfmapping_circular_in;
- fp_halfmapping_transloc = fp_halfmapping_transloc_in;
- fp_halfmapping_mult = fp_halfmapping_mult_in;
- fp_halfmapping_mult_xs_1 = fp_halfmapping_mult_xs_1_in;
- fp_halfmapping_mult_xs_2 = fp_halfmapping_mult_xs_2_in;
- fp_paired_uniq_circular = fp_paired_uniq_circular_in;
- fp_paired_uniq_inv = fp_paired_uniq_inv_in;
- fp_paired_uniq_scr = fp_paired_uniq_scr_in;
- fp_paired_uniq_long = fp_paired_uniq_long_in;
- fp_paired_mult = fp_paired_mult_in;
- fp_paired_mult_xs_1 = fp_paired_mult_xs_1_in;
- fp_paired_mult_xs_2 = fp_paired_mult_xs_2_in;
- fp_concordant_uniq = fp_concordant_uniq_in;
- fp_concordant_circular = fp_concordant_circular_in;
- fp_concordant_transloc = fp_concordant_transloc_in;
- fp_concordant_mult = fp_concordant_mult_in;
- fp_concordant_mult_xs_1 = fp_concordant_mult_xs_1_in;
- fp_concordant_mult_xs_2 = fp_concordant_mult_xs_2_in;
-
- return;
-}
-
-void
-Stage3hr_file_setup_all (FILE *fp_failedinput_1_in, FILE *fp_failedinput_2_in, FILE *fp_nomapping_in,
- FILE *fp_unpaired_uniq_in, FILE *fp_unpaired_circular_in, FILE *fp_unpaired_transloc_in,
- FILE *fp_unpaired_mult_in, FILE *fp_unpaired_mult_xs_1_in, FILE *fp_unpaired_mult_xs_2_in,
- FILE *fp_halfmapping_uniq_in, FILE *fp_halfmapping_circular_in, FILE *fp_halfmapping_transloc_in,
- FILE *fp_halfmapping_mult_in, FILE *fp_halfmapping_mult_xs_1_in, FILE *fp_halfmapping_mult_xs_2_in,
- FILE *fp_paired_uniq_circular_in, FILE *fp_paired_uniq_inv_in, FILE *fp_paired_uniq_scr_in,
- FILE *fp_paired_uniq_long_in, FILE *fp_paired_mult_in, FILE *fp_paired_mult_xs_1_in, FILE *fp_paired_mult_xs_2_in,
- FILE *fp_concordant_uniq_in, FILE *fp_concordant_circular_in, FILE *fp_concordant_transloc_in,
- FILE *fp_concordant_mult_in, FILE *fp_concordant_mult_xs_1_in, FILE *fp_concordant_mult_xs_2_in) {
-
- fp_failedinput_1 = fp_failedinput_1_in;
- fp_failedinput_2 = fp_failedinput_2_in;
-
- fp_nomapping = fp_nomapping_in;
- fp_unpaired_uniq = fp_unpaired_uniq_in;
- fp_unpaired_circular = fp_unpaired_circular_in;
- fp_unpaired_transloc = fp_unpaired_transloc_in;
- fp_unpaired_mult = fp_unpaired_mult_in;
- fp_unpaired_mult_xs_1 = fp_unpaired_mult_xs_1_in;
- fp_unpaired_mult_xs_2 = fp_unpaired_mult_xs_2_in;
- fp_halfmapping_uniq = fp_halfmapping_uniq_in;
- fp_halfmapping_circular = fp_halfmapping_circular_in;
- fp_halfmapping_transloc = fp_halfmapping_transloc_in;
- fp_halfmapping_mult = fp_halfmapping_mult_in;
- fp_halfmapping_mult_xs_1 = fp_halfmapping_mult_xs_1_in;
- fp_halfmapping_mult_xs_2 = fp_halfmapping_mult_xs_2_in;
- fp_paired_uniq_circular = fp_paired_uniq_circular_in;
- fp_paired_uniq_inv = fp_paired_uniq_inv_in;
- fp_paired_uniq_scr = fp_paired_uniq_scr_in;
- fp_paired_uniq_long = fp_paired_uniq_long_in;
- fp_paired_mult = fp_paired_mult_in;
- fp_paired_mult_xs_1 = fp_paired_mult_xs_1_in;
- fp_paired_mult_xs_2 = fp_paired_mult_xs_2_in;
- fp_concordant_uniq = fp_concordant_uniq_in;
- fp_concordant_circular = fp_concordant_circular_in;
- fp_concordant_transloc = fp_concordant_transloc_in;
- fp_concordant_mult = fp_concordant_mult_in;
- fp_concordant_mult_xs_1 = fp_concordant_mult_xs_1_in;
- fp_concordant_mult_xs_2 = fp_concordant_mult_xs_2_in;
-
- return;
-}
-
-
static char *
print_sense (int sense) {
@@ -439,6 +340,7 @@ struct T {
Hittype_T hittype;
int genestrand;
bool sarrayp; /* true if alignment found by suffix array */
+ GMAP_source_T gmap_source;
bool improved_by_gmap_p; /* true if GMAP alignment based on this hit is better */
Chrnum_T chrnum; /* Needed for printing paired-end results. A chrnum of 0 indicates a distant splice. */
@@ -448,7 +350,7 @@ struct T {
Univcoord_T chrhigh;
Chrpos_T chrlength;
- int querylength; /* Needed for overlap calculations */
+ int querylength; /* Needed for overlap and pairlength calculations */
int querylength_adj; /* Adjusted for insertions */
Univcoord_T genomicstart;
@@ -477,7 +379,9 @@ struct T {
bool trim_left_splicep;
bool trim_right_splicep;
+#if 0
int penalties; /* Indel penalties */
+#endif
int score_eventrim; /* Temporary storage used by Stage3end_optimal_score */
Overlap_T gene_overlap;
@@ -488,9 +392,6 @@ struct T {
int nmismatches_refdiff; /* Set only for display */
int nindels; /* for indels */
- int indel_pos; /* for indels. Relative to querypos 0 */
- int indel_low; /* for indels. Relative to chromosomal low end of read, but still 0 if no indel. */
- char *deletion; /* for deletions */
Chrpos_T distance; /* for splicing or shortexon (sum of two distances) */
Chrpos_T shortexonA_distance; /* for shortexon */
@@ -500,26 +401,24 @@ struct T {
int gmap_cdna_direction;
int gmap_nintrons;
int sensedir; /* for splicing */
- int sensedir_nonamb; /* for splicing */
+ int nsplices;
+
+#if 0
bool start_ambiguous_p;
bool end_ambiguous_p;
- int nchimera_known;
- int nchimera_novel;
-
- int start_amb_length; /* For splice, shortexon, and GMAP */
- int end_amb_length; /* For splice, shortexon, and GMAP */
int amb_length_donor; /* For shortexon only */
int amb_length_acceptor; /* For shortexon only */
-
- double start_amb_prob; /* For determining score_eventrim */
- double end_amb_prob; /* For determining score_eventrim */
double amb_prob_donor; /* For shortexon */
double amb_prob_acceptor; /* For shortexon */
+#endif
+ int gmap_start_amb_length; /* Needed because GMAP doesn't have substrings */
+ int gmap_end_amb_length; /* Needed because GMAP doesn't have substrings */
Endtype_T gmap_start_endtype; /* For GMAP, which has no substrings */
Endtype_T gmap_end_endtype; /* For GMAP, which has no substrings */
+#if 0
Univcoord_T *start_ambcoords; /* Pointer to either ambcoords_donor or ambcoords_acceptor */
Univcoord_T *end_ambcoords; /* Pointer to either ambcoords_donor or ambcoords_acceptor */
int start_nambcoords; /* Equal to either nambcoords_donor or nambcoords_acceptor */
@@ -529,7 +428,6 @@ struct T {
int nambcoords_donor;
int nambcoords_acceptor;
-
int *start_amb_knowni; /* Pointer to either amb_knowni_donor or amb_knowni_acceptor */
int *end_amb_knowni; /* Pointer to either amb_knowni_donor or amb_knowni_acceptor */
int *amb_knowni_donor;
@@ -544,31 +442,23 @@ struct T {
int *amb_nmismatches_acceptor;
double *amb_probs_donor;
double *amb_probs_acceptor;
-
-
- /* Single: substring1 */
- /* Indel: substring1 + substring2 */
- /* Halfsplice: substring1 */
- /* Splice: substring1 + substring2 */
- /* Shortexon: substring1 (shortexon) + substringD + substringA */
-
- /* Substrings should be in query order */
- Substring_T substring0;
- Substring_T substring1; /* Main substring */
- Substring_T substring2;
-
- Substring_T substring_donor; /* Just pointer to either substring1 or substring2 */
- Substring_T substring_acceptor; /* Just a pointer to either substring1 or substring2 */
- Substring_T substringD; /* Just a pointer to donor part of shortexon (substring0 or substring2) */
- Substring_T substringA; /* Just a pointer to acceptor part of shortexon (substring0 or substring2) */
+#endif
/* For GMAP alignment */
struct Pair_T *pairarray;
int npairs;
- int nsegments;
+ int nsegments; /* Used only for GSNAP output */
+ List_T cigar_tokens;
+ bool gmap_intronp;
+
+ List_T substrings_1toN; /* query position 1 to N */
+ List_T substrings_Nto1; /* query position N to 1. Keeps only pointers to the substrings. */
+ List_T substrings_LtoH; /* Chromosomal low-to-high. Keeps only pointers to the substrings. */
- List_T substring_LtoH; /* Chromosomal low-to-high, for computing chrpos */
+ List_T junctions_LtoH;
+ List_T junctions_1toN;
+ List_T junctions_Nto1;
bool paired_usedp;
bool paired_seenp; /* for paired-end. set to true by Stage3_pair_up(). */
@@ -594,7 +484,7 @@ struct Stage3pair_T {
int insertlength_expected_sign; /* 1 if in (expected_pairlength_low, expected_pairlength_high),
0 if in (expected_pairlength_low, expected_pairlength_very_high), and
-1 if < expected_pairlength_low or > expected_pairlength_very_high */
-
+
Chrpos_T outerlength;
float mapq_loglik;
@@ -604,7 +494,6 @@ struct Stage3pair_T {
int score;
int nmatches;
int nmatches_posttrim;
- int indel_low; /* For ranking identical indel alignments, so we pick lowest coord */
int score_eventrim;
@@ -620,14 +509,22 @@ struct Stage3pair_T {
int dir; /* -1, 0, or +1 */
bool sense_consistent_p;
- int nchimera_known;
- int nchimera_novel;
+ int nsplices;
bool circularp; /* If either hit5 or hit3 are circular */
+ int amb_resolve_5; /* Resolution of ambiguous end for this particular pair */
+ int amb_resolve_3; /* Resolution of ambiguous end for this particular pair */
+ int amb_status_inside;
};
+char *
+Stage3end_deletion_string (T this) {
+ abort();
+}
+
+
Hittype_T
Stage3end_hittype (T this) {
return this->hittype;
@@ -648,6 +545,7 @@ hittype_string (Hittype_T hittype) {
case ONE_THIRD_SHORTEXON: return "one-third-shortexon";
case TWO_THIRDS_SHORTEXON: return "two-thirds-shortexon";
case SHORTEXON: return "shortexon";
+ case SUBSTRINGS: return "substrings";
case GMAP: return "gmap";
case TERMINAL: return "terminal";
default: abort();
@@ -666,7 +564,12 @@ Stage3end_genestrand (T this) {
bool
Stage3end_sarrayp (T this) {
- return this->sarrayp;
+ if (this == NULL) {
+ /* Can happen if we call upon a mate in a halfmapping */
+ return false;
+ } else {
+ return this->sarrayp;
+ }
}
bool
@@ -695,6 +598,7 @@ Stage3end_anomalous_splice_p (T this) {
Chrnum_T
Stage3end_chrnum (T this) {
if (this == NULL) {
+ /* Can happen if we call upon a mate in a halfmapping */
return 0;
} else {
return this->chrnum;
@@ -704,6 +608,7 @@ Stage3end_chrnum (T this) {
Chrnum_T
Stage3end_effective_chrnum (T this) {
if (this == NULL) {
+ /* Can happen if we call upon a mate in a halfmapping */
return 0;
} else {
return this->effective_chrnum;
@@ -723,6 +628,7 @@ Stage3end_chrhigh (T this) {
Chrpos_T
Stage3end_chrlength (T this) {
if (this == NULL) {
+ /* Can happen if we call upon a mate in a halfmapping */
return 0;
} else {
return this->chrlength;
@@ -742,28 +648,43 @@ Stage3end_genomicend (T this) {
/* For Goby */
int
Stage3end_query_alignment_length (T this) {
- int length;
+ int length = 0;
+ List_T p;
+ Substring_T substring;
+ Junction_T junction;
- length = Substring_match_length(this->substring1);
- length += Substring_match_length(this->substring2);
- length += Substring_match_length(this->substring0);
- if (this->hittype == INSERTION) {
- length += this->nindels;
+ for (p = this->substrings_LtoH; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ length += Substring_match_length(substring);
}
+ for (p = this->junctions_LtoH; p != NULL; p = List_next(p)) {
+ junction = (Junction_T) List_head(p);
+ if (Junction_type(junction) == INS_JUNCTION) {
+ length += Junction_nindels(junction);
+ }
+ }
+
return length;
}
-/* For Goby */
Chrpos_T
Stage3end_genomic_alignment_length (T this) {
- Chrpos_T length;
+ Chrpos_T length = 0;
+ List_T p;
+ Substring_T substring;
+ Junction_T junction;
- length = Substring_genomic_alignment_length(this->substring1);
- length += Substring_genomic_alignment_length(this->substring2);
- length += Substring_genomic_alignment_length(this->substring0);
- if (this->hittype == DELETION) {
- length += (Chrpos_T) this->nindels;
+ for (p = this->substrings_LtoH; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ length += Substring_genomic_alignment_length(substring);
+ }
+ for (p = this->junctions_LtoH; p != NULL; p = List_next(p)) {
+ junction = (Junction_T) List_head(p);
+ if (Junction_type(junction) == DEL_JUNCTION) {
+ length += (Chrpos_T) Junction_nindels(junction);
+ }
}
+
return length;
}
@@ -772,7 +693,7 @@ Chrpos_T
Stage3end_chrpos_low_trim (T this) {
Substring_T substring_low;
- substring_low = (Substring_T) List_head(this->substring_LtoH);
+ substring_low = (Substring_T) List_head(this->substrings_LtoH);
if (this->plusp == true) {
return Substring_alignstart_trim(substring_low) - Substring_chroffset(substring_low);
} else {
@@ -879,13 +800,19 @@ Stage3end_nmismatches_refdiff (T this) {
/* Called only for terminals */
Endtype_T
Stage3end_start_endtype (T this) {
- return Substring_start_endtype(this->substring1);
+ Substring_T substring;
+
+ substring = (Substring_T) List_head(this->substrings_LtoH);
+ return Substring_start_endtype(substring);
}
/* Called only for terminals */
Endtype_T
Stage3end_end_endtype (T this) {
- return Substring_end_endtype(this->substring1);
+ Substring_T substring;
+
+ substring = (Substring_T) List_head(this->substrings_LtoH);
+ return Substring_end_endtype(substring);
}
Endtype_T
@@ -904,11 +831,10 @@ Stage3end_nindels (T this) {
}
int
-Stage3end_indel_pos (T this) {
- return this->indel_pos;
+Stage3end_querylength (T this) {
+ return this->querylength;
}
-
bool
Stage3end_plusp (T this) {
return this->plusp;
@@ -931,14 +857,32 @@ Stage3end_trim_right (T this) {
return this->trim_right;
}
+static int
+start_amb_length (T this) {
+ if (this->hittype == GMAP) {
+ return this->gmap_start_amb_length;
+ } else {
+ return Substring_match_length_amb((Substring_T) List_head(this->substrings_1toN));
+ }
+}
+
+static int
+end_amb_length (T this) {
+ if (this->hittype == GMAP) {
+ return this->gmap_end_amb_length;
+ } else {
+ return Substring_match_length_amb((Substring_T) List_head(this->substrings_Nto1));
+ }
+}
+
int
Stage3end_trim_left_raw (T this) {
- return this->trim_left + this->start_amb_length;
+ return this->trim_left + start_amb_length(this);
}
int
Stage3end_trim_right_raw (T this) {
- return this->trim_right + this->end_amb_length;
+ return this->trim_right + end_amb_length(this);
}
int
@@ -947,136 +891,168 @@ Stage3end_circularpos (T this) {
}
+Junction_T
+Stage3end_junctionD (T this) {
+ if (this->sensedir == SENSE_ANTI) {
+ return (Junction_T) List_head(this->junctions_Nto1);
+ } else {
+ return (Junction_T) List_head(this->junctions_1toN);
+ }
+}
+
+Junction_T
+Stage3end_junctionA (T this) {
+ if (this->sensedir == SENSE_ANTI) {
+ return (Junction_T) List_head(this->junctions_1toN);
+ } else {
+ return (Junction_T) List_head(this->junctions_Nto1);
+ }
+}
+
+List_T
+Stage3end_substrings_LtoH (T this) {
+ return this->substrings_LtoH;
+}
+
+List_T
+Stage3end_junctions_LtoH (T this) {
+ return this->junctions_LtoH;
+}
+
+
+/* Called only by samprint currently */
Substring_T
Stage3end_substring1 (T this) {
- return this->substring1;
+ return (Substring_T) List_head(this->substrings_1toN);
}
+/* Called only by samprint currently */
Substring_T
Stage3end_substring2 (T this) {
- return this->substring2;
+ return (Substring_T) List_head(this->substrings_Nto1);
}
-char *
-Stage3end_deletion_string (T this) {
- return this->deletion;
-}
Substring_T
Stage3end_substring_donor (T this) {
- assert(this->hittype == SPLICE || this->hittype == SAMECHR_SPLICE || this->hittype == TRANSLOC_SPLICE ||
- this->hittype == HALFSPLICE_DONOR || this->hittype == HALFSPLICE_ACCEPTOR);
- return this->substring_donor;
+ if (this->sensedir == SENSE_ANTI) {
+ return (Substring_T) List_head(this->substrings_Nto1);
+ } else if (this->sensedir == SENSE_FORWARD) {
+ return (Substring_T) List_head(this->substrings_1toN);
+ } else {
+ abort();
+ }
}
Substring_T
Stage3end_substring_acceptor (T this) {
- assert(this->hittype == SPLICE || this->hittype == SAMECHR_SPLICE || this->hittype == TRANSLOC_SPLICE ||
- this->hittype == HALFSPLICE_DONOR || this->hittype == HALFSPLICE_ACCEPTOR);
- return this->substring_acceptor;
+ if (this->sensedir == SENSE_ANTI) {
+ return (Substring_T) List_head(this->substrings_1toN);
+ } else if (this->sensedir == SENSE_FORWARD) {
+ return (Substring_T) List_head(this->substrings_Nto1);
+ } else {
+ abort();
+ }
}
+/* Now same as Stage3end_substring_donor */
Substring_T
Stage3end_substringD (T this) {
- assert(this->hittype == SHORTEXON || this->hittype == ONE_THIRD_SHORTEXON || this->hittype == TWO_THIRDS_SHORTEXON);
- return this->substringD;
+ if (this->sensedir == SENSE_ANTI) {
+ return (Substring_T) List_head(this->substrings_Nto1);
+ } else {
+ return (Substring_T) List_head(this->substrings_1toN);
+ }
}
+/* Now same as Stage3end_substring_acceptor */
Substring_T
Stage3end_substringA (T this) {
- assert(this->hittype == SHORTEXON || this->hittype == ONE_THIRD_SHORTEXON || this->hittype == TWO_THIRDS_SHORTEXON);
- return this->substringA;
+ if (this->sensedir == SENSE_ANTI) {
+ return (Substring_T) List_head(this->substrings_1toN);
+ } else {
+ return (Substring_T) List_head(this->substrings_Nto1);
+ }
+}
+
+
+Substring_T
+Stage3end_substringS (T this) {
+ return (Substring_T) List_head(List_next(this->substrings_1toN));
}
+
+/* Same logic as in print_substrings in samprint.c to get the first substring for CIGAR or MD string */
Substring_T
-Stage3end_substring_low (T this, int trim_low) {
+Stage3end_substring_low (T this, int hardclip_low) {
List_T p;
- Substring_T substring;
if (this == NULL) {
return (Substring_T) NULL;
} else if (this->plusp == true) {
- p = this->substring_LtoH;
-#ifdef DEBUG13
- if (p != NULL) {
- printf("Substring is %d..%d against trim_low %d\n",
- Substring_querystart((Substring_T) List_head(p)),Substring_queryend((Substring_T) List_head(p)),
- trim_low);
+ p = this->substrings_LtoH;
+ if (Substring_ambiguous_p((Substring_T) List_head(p)) == true) {
+ p = List_next(p);
}
-#endif
- while (p != NULL && Substring_queryend((Substring_T) List_head(p)) < trim_low) {
+ while (p != NULL && Substring_queryend((Substring_T) List_head(p)) <= hardclip_low) {
+ debug15(printf("Plus: Skippping substring %d..%d against hardclip_low %d\n",
+ Substring_querystart((Substring_T) List_head(p)),Substring_queryend((Substring_T) List_head(p)),
+ hardclip_low));
p = List_next(p);
-#ifdef DEBUG13
- if (p != NULL) {
- printf("Substring is %d..%d against trim_low %d\n",
- Substring_querystart((Substring_T) List_head(p)),Substring_queryend((Substring_T) List_head(p)),
- trim_low);
- }
-#endif
}
assert(p != NULL);
if (p == NULL) {
return (Substring_T) NULL;
} else {
+ debug15(printf("Plus: Returning substring %d..%d against hardclip_low %d\n",
+ Substring_querystart((Substring_T) List_head(p)),Substring_queryend((Substring_T) List_head(p)),
+ hardclip_low));
return (Substring_T) List_head(p);
}
} else {
-#ifdef DEBUG13
- for (p = this->substring_LtoH; p != NULL; p = List_next(p)) {
+#ifdef DEBUG15
+ for (p = this->substrings_LtoH; p != NULL; p = List_next(p)) {
printf("LtoH: %d..%d\n",
Substring_querystart((Substring_T) List_head(p)),Substring_queryend((Substring_T) List_head(p)));
}
#endif
- p = this->substring_LtoH;
-#ifdef DEBUG13
- if (p != NULL) {
- printf("Substring is %d..%d against %d = querylength %d - trim_low %d\n",
- Substring_querystart((Substring_T) List_head(p)),Substring_queryend((Substring_T) List_head(p)),
- this->querylength_adj - trim_low,this->querylength_adj,trim_low);
+ p = this->substrings_LtoH;
+ if (Substring_ambiguous_p((Substring_T) List_head(p)) == true) {
+ p = List_next(p);
}
-#endif
- while (p != NULL && Substring_querystart((Substring_T) List_head(p)) > this->querylength_adj - trim_low) {
+
+ while (p != NULL && Substring_querystart((Substring_T) List_head(p)) >= this->querylength - hardclip_low) {
+ debug15(printf("Minus: Skipping substring %d..%d against %d = querylength %d - hardclip_low %d\n",
+ Substring_querystart((Substring_T) List_head(p)),Substring_queryend((Substring_T) List_head(p)),
+ this->querylength - hardclip_low,this->querylength,hardclip_low));
p = List_next(p);
-#ifdef DEBUG13
- if (p != NULL) {
- printf("Substring is %d..%d against %d = querylength %d - trim_low %d\n",
- Substring_querystart((Substring_T) List_head(p)),Substring_queryend((Substring_T) List_head(p)),
- this->querylength_adj - trim_low,this->querylength_adj,trim_low);
- }
-#endif
}
assert(p != NULL);
if (p == NULL) {
return (Substring_T) NULL;
} else {
+ debug15(printf("Minus: Returning substring %d..%d against %d = querylength %d - hardclip_low %d\n",
+ Substring_querystart((Substring_T) List_head(p)),Substring_queryend((Substring_T) List_head(p)),
+ this->querylength - hardclip_low,this->querylength,hardclip_low));
return (Substring_T) List_head(p);
}
}
}
-
-Substring_T
-Stage3end_substring_high (T this) {
- if (this == NULL) {
- return (Substring_T) NULL;
- } else {
- return (Substring_T) List_last_value(this->substring_LtoH);
- }
-}
Substring_T
Stage3end_substring_containing (T this, int querypos) {
- if (Substring_contains_p(this->substring1,querypos) == true) {
- return this->substring1;
- }
- if (this->substring2 != NULL && Substring_contains_p(this->substring2,querypos) == true) {
- return this->substring2;
- }
- if (this->substring0 != NULL && Substring_contains_p(this->substring0,querypos) == true) {
- return this->substring0;
+ Substring_T substring;
+ List_T p;
+
+ for (p = this->substrings_LtoH; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_contains_p(substring,querypos) == true) {
+ return substring;
+ }
}
return (Substring_T) NULL;
}
@@ -1093,6 +1069,16 @@ Stage3end_npairs (T this) {
return this->npairs;
}
+List_T
+Stage3end_cigar_tokens (T this) {
+ return this->cigar_tokens;
+}
+
+bool
+Stage3end_gmap_intronp (T this) {
+ return this->gmap_intronp;
+}
+
Chrpos_T
Stage3end_distance (T this) {
@@ -1111,23 +1097,64 @@ Stage3end_shortexonD_distance (T this) {
double
Stage3end_chimera_prob (T this) {
- return Substring_chimera_prob(this->substring_donor) + Substring_chimera_prob(this->substring_acceptor);
+ List_T p;
+ Junction_T junction;
+
+ for (p = this->junctions_1toN; p != NULL; p = List_next(p)) {
+ junction = (Junction_T) List_head(p);
+ if (Junction_type(junction) == CHIMERA_JUNCTION) {
+ return Junction_prob(junction);
+ }
+ }
+
+ return 0.0;
}
double
Stage3end_shortexon_prob (T this) {
- return Substring_chimera_prob(this->substringD) +
- + Substring_chimera_prob(this->substring1) + Substring_chimera_prob_2(this->substring1) +
- Substring_chimera_prob(this->substringA);
+ double prob = 0.0;
+ List_T p;
+ Junction_T junction;
+
+ for (p = this->junctions_LtoH; p != NULL; p = List_next(p)) {
+ junction = (Junction_T) List_head(p);
+ prob += Junction_prob(junction);
+ }
+
+ return prob;
+}
+
+static double
+Stage3end_prob (T this) {
+ double prob = 0.0;
+ List_T p;
+ Junction_T junction;
+
+ for (p = this->junctions_LtoH; p != NULL; p = List_next(p)) {
+ junction = (Junction_T) List_head(p);
+ prob += Junction_prob(junction);
+ }
+
+ return prob;
}
+/* Should eventually look for substrings adjacent to the chimeric junction */
Univcoord_T
Stage3end_chimera_segmenti_left (T this) {
Univcoord_T x_segmenti, x_segmentj;
+ Substring_T substring_donor, substring_acceptor;
+
+ if (this->sensedir == SENSE_ANTI) {
+ substring_donor = (Substring_T) List_head(this->substrings_Nto1);
+ substring_acceptor = (Substring_T) List_head(this->substrings_1toN);
+ } else {
+ substring_donor = (Substring_T) List_head(this->substrings_1toN);
+ substring_acceptor = (Substring_T) List_head(this->substrings_Nto1);
+ }
- x_segmenti = Substring_left_genomicseg(this->substring_donor);
- x_segmentj = Substring_left_genomicseg(this->substring_acceptor);
+ x_segmenti = Substring_left_genomicseg(substring_donor);
+ x_segmentj = Substring_left_genomicseg(substring_acceptor);
if (x_segmenti < x_segmentj) {
return x_segmenti;
} else {
@@ -1135,12 +1162,22 @@ Stage3end_chimera_segmenti_left (T this) {
}
}
+/* Should eventually look for substrings adjacent to the chimeric junction */
Univcoord_T
Stage3end_chimera_segmentj_left (T this) {
Univcoord_T x_segmenti, x_segmentj;
+ Substring_T substring_donor, substring_acceptor;
+
+ if (this->sensedir == SENSE_ANTI) {
+ substring_donor = (Substring_T) List_head(this->substrings_Nto1);
+ substring_acceptor = (Substring_T) List_head(this->substrings_1toN);
+ } else {
+ substring_donor = (Substring_T) List_head(this->substrings_1toN);
+ substring_acceptor = (Substring_T) List_head(this->substrings_Nto1);
+ }
- x_segmenti = Substring_left_genomicseg(this->substring_donor);
- x_segmentj = Substring_left_genomicseg(this->substring_acceptor);
+ x_segmenti = Substring_left_genomicseg(substring_donor);
+ x_segmentj = Substring_left_genomicseg(substring_acceptor);
if (x_segmenti > x_segmentj) {
return x_segmenti;
} else {
@@ -1154,17 +1191,35 @@ Stage3end_chimera_segmenti_cmp (const void *a, const void *b) {
T x = * (T *) a;
T y = * (T *) b;
Univcoord_T x_segmenti, x_segmentj, y_segmenti, y_segmentj, temp;
+ Substring_T x_substring_donor, x_substring_acceptor,
+ y_substring_donor, y_substring_acceptor;
- x_segmenti = Substring_left_genomicseg(x->substring_donor);
- x_segmentj = Substring_left_genomicseg(x->substring_acceptor);
+ if (x->sensedir == SENSE_ANTI) {
+ x_substring_donor = (Substring_T) List_head(x->substrings_Nto1);
+ x_substring_acceptor = (Substring_T) List_head(x->substrings_1toN);
+ } else {
+ x_substring_donor = (Substring_T) List_head(x->substrings_1toN);
+ x_substring_acceptor = (Substring_T) List_head(x->substrings_Nto1);
+ }
+
+ if (y->sensedir == SENSE_ANTI) {
+ y_substring_donor = (Substring_T) List_head(y->substrings_Nto1);
+ y_substring_acceptor = (Substring_T) List_head(y->substrings_1toN);
+ } else {
+ y_substring_donor = (Substring_T) List_head(y->substrings_1toN);
+ y_substring_acceptor = (Substring_T) List_head(y->substrings_Nto1);
+ }
+
+ x_segmenti = Substring_left_genomicseg(x_substring_donor);
+ x_segmentj = Substring_left_genomicseg(x_substring_acceptor);
if (x_segmentj < x_segmenti) {
temp = x_segmentj;
x_segmentj = x_segmenti;
x_segmenti = temp;
}
- y_segmenti = Substring_left_genomicseg(y->substring_donor);
- y_segmentj = Substring_left_genomicseg(y->substring_acceptor);
+ y_segmenti = Substring_left_genomicseg(y_substring_donor);
+ y_segmentj = Substring_left_genomicseg(y_substring_acceptor);
if (y_segmentj < y_segmenti) {
temp = y_segmentj;
y_segmentj = y_segmenti;
@@ -1191,17 +1246,36 @@ Stage3end_chimera_segmentj_cmp (const void *a, const void *b) {
T x = * (T *) a;
T y = * (T *) b;
Univcoord_T x_segmenti, x_segmentj, y_segmenti, y_segmentj, temp;
+ Substring_T x_substring_donor, x_substring_acceptor,
+ y_substring_donor, y_substring_acceptor;
+
+ if (x->sensedir == SENSE_ANTI) {
+ x_substring_donor = (Substring_T) List_head(x->substrings_Nto1);
+ x_substring_acceptor = (Substring_T) List_head(x->substrings_1toN);
+ } else {
+ x_substring_donor = (Substring_T) List_head(x->substrings_1toN);
+ x_substring_acceptor = (Substring_T) List_head(x->substrings_Nto1);
+ }
+
+ if (y->sensedir == SENSE_ANTI) {
+ y_substring_donor = (Substring_T) List_head(y->substrings_Nto1);
+ y_substring_acceptor = (Substring_T) List_head(y->substrings_1toN);
+ } else {
+ y_substring_donor = (Substring_T) List_head(y->substrings_1toN);
+ y_substring_acceptor = (Substring_T) List_head(y->substrings_Nto1);
+ }
- x_segmenti = Substring_left_genomicseg(x->substring_donor);
- x_segmentj = Substring_left_genomicseg(x->substring_acceptor);
+
+ x_segmenti = Substring_left_genomicseg(x_substring_donor);
+ x_segmentj = Substring_left_genomicseg(x_substring_acceptor);
if (x_segmentj < x_segmenti) {
temp = x_segmentj;
x_segmentj = x_segmenti;
x_segmenti = temp;
}
- y_segmenti = Substring_left_genomicseg(y->substring_donor);
- y_segmentj = Substring_left_genomicseg(y->substring_acceptor);
+ y_segmenti = Substring_left_genomicseg(y_substring_donor);
+ y_segmentj = Substring_left_genomicseg(y_substring_acceptor);
if (y_segmentj < y_segmenti) {
temp = y_segmentj;
y_segmentj = y_segmenti;
@@ -1227,9 +1301,23 @@ Stage3end_shortexon_substringD_cmp (const void *a, const void *b) {
T x = * (T *) a;
T y = * (T *) b;
Univcoord_T x_left, y_left;
+ Substring_T x_substring_donor, y_substring_donor;
+
+ if (x->sensedir == SENSE_ANTI) {
+ x_substring_donor = (Substring_T) List_head(x->substrings_Nto1);
+ } else {
+ x_substring_donor = (Substring_T) List_head(x->substrings_1toN);
+ }
- x_left = Substring_left_genomicseg(x->substringD);
- y_left = Substring_left_genomicseg(y->substringD);
+ if (y->sensedir == SENSE_ANTI) {
+ y_substring_donor = (Substring_T) List_head(y->substrings_Nto1);
+ } else {
+ y_substring_donor = (Substring_T) List_head(y->substrings_1toN);
+ }
+
+
+ x_left = Substring_left_genomicseg(x_substring_donor);
+ y_left = Substring_left_genomicseg(y_substring_donor);
if (x_left < y_left) {
return -1;
} else if (y_left < x_left) {
@@ -1244,9 +1332,23 @@ Stage3end_shortexon_substringA_cmp (const void *a, const void *b) {
T x = * (T *) a;
T y = * (T *) b;
Univcoord_T x_left, y_left;
+ Substring_T x_substring_acceptor, y_substring_acceptor;
+
+ if (x->sensedir == SENSE_ANTI) {
+ x_substring_acceptor = (Substring_T) List_head(x->substrings_1toN);
+ } else {
+ x_substring_acceptor = (Substring_T) List_head(x->substrings_Nto1);
+ }
+
+ if (y->sensedir == SENSE_ANTI) {
+ y_substring_acceptor = (Substring_T) List_head(y->substrings_1toN);
+ } else {
+ y_substring_acceptor = (Substring_T) List_head(y->substrings_Nto1);
+ }
- x_left = Substring_left_genomicseg(x->substringA);
- y_left = Substring_left_genomicseg(y->substringA);
+
+ x_left = Substring_left_genomicseg(x_substring_acceptor);
+ y_left = Substring_left_genomicseg(y_substring_acceptor);
if (x_left < y_left) {
return -1;
} else if (y_left < x_left) {
@@ -1261,13 +1363,14 @@ Stage3end_shortexon_substringA_cmp (const void *a, const void *b) {
int
Stage3end_sensedir (T this) {
- return this->sensedir;
+ if (this == NULL) {
+ /* Can happen if we call upon a mate in a halfmapping */
+ return SENSE_NULL;
+ } else {
+ return this->sensedir;
+ }
}
-int
-Stage3end_sensedir_nonamb (T this) {
- return this->sensedir_nonamb;
-}
int
Stage3end_cdna_direction (T this) {
@@ -1298,45 +1401,84 @@ Stage3end_nintrons (T this) {
bool
Stage3end_start_ambiguous_p (T this) {
- return this->start_ambiguous_p;
+ Substring_T substring;
+
+ substring = (Substring_T) List_head(this->substrings_1toN);
+ return Substring_ambiguous_p(substring);
}
bool
Stage3end_end_ambiguous_p (T this) {
- return this->end_ambiguous_p;
-}
-
-int
-Stage3end_amb_length_start (T this) {
- return this->start_amb_length;
-}
+ Substring_T substring;
-int
-Stage3end_amb_length_end (T this) {
- return this->end_amb_length;
+ substring = (Substring_T) List_head(this->substrings_Nto1);
+ return Substring_ambiguous_p(substring);
}
Univcoord_T *
Stage3end_start_ambcoords (T this) {
- return this->start_ambcoords;
+ Substring_T substring;
+
+ substring = (Substring_T) List_head(this->substrings_1toN);
+ if (Substring_ambiguous_p(substring) == false) {
+ return (Univcoord_T *) NULL;
+ } else {
+ return Substring_ambcoords(substring);
+ }
}
Univcoord_T *
Stage3end_end_ambcoords (T this) {
- return this->end_ambcoords;
+ Substring_T substring;
+
+ substring = (Substring_T) List_head(this->substrings_Nto1);
+ if (Substring_ambiguous_p(substring) == false) {
+ return (Univcoord_T *) NULL;
+ } else {
+ return Substring_ambcoords(substring);
+ }
}
int
Stage3end_start_nambcoords (T this) {
- return this->start_nambcoords;
+ Substring_T substring;
+
+ substring = (Substring_T) List_head(this->substrings_1toN);
+ if (Substring_ambiguous_p(substring) == false) {
+ return 0;
+ } else {
+ return Substring_nambcoords(substring);
+ }
}
int
Stage3end_end_nambcoords (T this) {
- return this->end_nambcoords;
+ Substring_T substring;
+
+ substring = (Substring_T) List_head(this->substrings_Nto1);
+ if (Substring_ambiguous_p(substring) == false) {
+ return 0;
+ } else {
+ return Substring_nambcoords(substring);
+ }
+}
+
+
+int
+Stage3end_substrings_querystart (T this) {
+ Substring_T substring;
+
+ substring = (Substring_T) List_head(this->substrings_1toN);
+ return Substring_querystart(substring);
}
+int
+Stage3end_substrings_queryend (T this) {
+ Substring_T substring;
+ substring = (Substring_T) List_head(this->substrings_Nto1);
+ return Substring_queryend(substring);
+}
int
Stage3end_gmap_querystart (T this) {
@@ -1348,13 +1490,15 @@ Stage3end_gmap_queryend (T this) {
return this->pairarray[this->npairs - 1].querypos;
}
-
int
Stage3end_terminal_trim (T this) {
+ Substring_T substring;
+
if (this->hittype != TERMINAL) {
return 0;
} else {
- return Substring_trim_left(this->substring1) + Substring_trim_right(this->substring1);
+ substring = (Substring_T) List_head(this->substrings_LtoH);
+ return Substring_trim_left(substring) + Substring_trim_right(substring);
}
}
@@ -1368,35 +1512,16 @@ static Overlap_T
Stage3end_gene_overlap (T this) {
Overlap_T overlap;
bool foundp = false;
-
+ Substring_T substring;
+ List_T p;
+
if (this->hittype == GMAP) {
return Pair_gene_overlap(this->pairarray,this->npairs,genes_iit,
genes_divint_crosstable[this->chrnum],favor_multiexon_p);
} else {
- if ((overlap = Substring_gene_overlap(this->substring1,favor_multiexon_p)) == KNOWN_GENE_MULTIEXON) {
- return KNOWN_GENE_MULTIEXON;
- } else if (overlap == KNOWN_GENE) {
- if (favor_multiexon_p == false) {
- return KNOWN_GENE;
- } else {
- foundp = true;
- }
- }
-
- if (this->substring2 != NULL) {
- if ((overlap = Substring_gene_overlap(this->substring2,favor_multiexon_p)) == KNOWN_GENE_MULTIEXON) {
- return KNOWN_GENE_MULTIEXON;
- } else if (overlap == KNOWN_GENE) {
- if (favor_multiexon_p == false) {
- return KNOWN_GENE;
- } else {
- foundp = true;
- }
- }
- }
-
- if (this->substring0 != NULL) {
- if ((overlap = Substring_gene_overlap(this->substring0,favor_multiexon_p)) == KNOWN_GENE_MULTIEXON) {
+ for (p = this->substrings_LtoH; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if ((overlap = Substring_gene_overlap(substring,favor_multiexon_p)) == KNOWN_GENE_MULTIEXON) {
return KNOWN_GENE_MULTIEXON;
} else if (overlap == KNOWN_GENE) {
if (favor_multiexon_p == false) {
@@ -1418,6 +1543,9 @@ Stage3end_gene_overlap (T this) {
bool
Stage3end_contains_known_splicesite (T this) {
+ List_T p;
+ Substring_T substring;
+
/* assert(this->hittype != GMAP); */
/* indel + splice => requires gmap
@@ -1428,25 +1556,28 @@ Stage3end_contains_known_splicesite (T this) {
if (this->hittype == GMAP) {
/* Possible now because performing redo of GMAP for sense inconsistency */
return false;
- } else if (this->hittype != INSERTION && this->hittype != DELETION && this->hittype != SHORTEXON) {
- return false;
- } else if (Substring_contains_known_splicesite(this->substring1) == true) {
- return true;
- } else if (this->substring2 != NULL && Substring_contains_known_splicesite(this->substring2) == true) {
- return true;
- } else if (this->substring0 != NULL && Substring_contains_known_splicesite(this->substring0) == true) {
- return true;
} else {
+ for (p = this->substrings_LtoH; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_contains_known_splicesite(substring) == true) {
+ return true;
+ }
+ }
return false;
}
}
+
bool
Stage3end_indel_contains_known_splicesite (bool *leftp, bool *rightp, T this) {
+ Substring_T substring1, substring2;
+
/* indel + splice => requires gmap */
+ substring1 = (Substring_T) List_head(this->substrings_1toN);
+ substring2 = (Substring_T) List_head(this->substrings_Nto1);
- *leftp = Substring_contains_known_splicesite(this->substring1);
- *rightp = Substring_contains_known_splicesite(this->substring2);
+ *leftp = Substring_contains_known_splicesite(substring1);
+ *rightp = Substring_contains_known_splicesite(substring2);
if (*leftp == true || *rightp == true) {
return true;
} else {
@@ -1455,65 +1586,15 @@ Stage3end_indel_contains_known_splicesite (bool *leftp, bool *rightp, T this) {
}
-#if 0
-bool
-Stage3end_bad_stretch_p (T this, Compress_T query_compress_fwd, Compress_T query_compress_rev) {
- if (this->hittype == GMAP) {
-#if 0
- if (this->gmap_cdna_direction != 0 && this->sensedir == SENSE_NULL) {
- /* Doesn't work for alignments without introns */
- debug0(printf("Bad GMAP: cdna_direction %d and sense null\n",this->gmap_cdna_direction));
- return true;
- }
-#endif
-
- if (this->gmap_nindelbreaks > 3) {
- debug0(printf("Bad GMAP: nindel breaks %d > 3\n",this->gmap_nindelbreaks));
- return true;
-#if 0
- } else if (this->gmap_min_splice_prob < 0.5) {
- /* Calculation is buggy */
- debug0(printf("Bad GMAP: min splice prob %f < 0.5\n",this->gmap_min_splice_prob));
- return true;
- } else {
- return Stage3_bad_stretch_p(this->pairarray,this->npairs,/*pos5*/this->trim_left,
- /*pos3*/this->querylength_adj - this->trim_right);
- ngoodpart = Stage3_good_part(this->pairarray,this->npairs,/*pos5*/this->trim_left,
- /*pos3*/this->querylength_adj - this->trim_right);
- if (ngoodpart < this->querylength_adj/2) {
- return true;
- } else {
- return false;
- }
-#endif
- }
- } else if (Substring_bad_stretch_p(this->substring1,query_compress_fwd,query_compress_rev) == true) {
- return true;
- } else if (this->substring2 != NULL && Substring_bad_stretch_p(this->substring2,query_compress_fwd,query_compress_rev) == true) {
- return true;
- } else if (this->substring0 != NULL && Substring_bad_stretch_p(this->substring0,query_compress_fwd,query_compress_rev) == true) {
- return true;
- } else {
- return false;
- }
-}
-#endif
-
-
-
static long int
Stage3end_compute_tally (T this) {
long int tally = 0L;
+ List_T p;
+ Substring_T substring;
- tally = 0L;
- if (this->substring1 != NULL) {
- tally += Substring_tally(this->substring1,tally_iit,tally_divint_crosstable);
- }
- if (this->substring2 != NULL) {
- tally += Substring_tally(this->substring2,tally_iit,tally_divint_crosstable);
- }
- if (this->substring0 != NULL) {
- tally += Substring_tally(this->substring0,tally_iit,tally_divint_crosstable);
+ for (p = this->substrings_LtoH; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ tally += Substring_tally(substring,tally_iit,tally_divint_crosstable);
}
return tally;
@@ -1521,50 +1602,25 @@ Stage3end_compute_tally (T this) {
static bool
Stage3end_runlength_p (T this) {
- if (this->substring1 != NULL && Substring_runlength_p(this->substring1,runlength_iit,runlength_divint_crosstable) == true) {
- return true;
- }
- if (this->substring2 != NULL && Substring_runlength_p(this->substring2,runlength_iit,runlength_divint_crosstable) == true) {
- return true;
- }
- if (this->substring0 != NULL && Substring_runlength_p(this->substring0,runlength_iit,runlength_divint_crosstable) == true) {
- return true;
- }
-
- return false;
-}
-
-
-#if 0
-/* Tries to use tally information. Now obsolete */
-static long int
-Stage3end_tally (T this) {
+ List_T p;
+ Substring_T substring;
- if (tally_iit == NULL) {
- return 0L;
- } else if (this->tally >= 0) {
- return this->tally;
- } else {
- this->tally = 0L;
- if (this->substring1 != NULL) {
- this->tally += Substring_tally(this->substring1,tally_iit,tally_divint_crosstable);
- }
- if (this->substring2 != NULL) {
- this->tally += Substring_tally(this->substring2,tally_iit,tally_divint_crosstable);
- }
- if (this->substring0 != NULL) {
- this->tally += Substring_tally(this->substring0,tally_iit,tally_divint_crosstable);
+ for (p = this->substrings_LtoH; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_runlength_p(substring,runlength_iit,runlength_divint_crosstable) == true) {
+ return true;
}
-
- return this->tally;
}
+
+ return false;
}
-#endif
bool
Stage3end_genomicbound_from_start (Univcoord_T *genomicbound, T this, int overlap, Univcoord_T chroffset) {
int substring_length;
+ List_T p;
+ Substring_T substring;
debug11(printf("Stage3end_genomicbound_from_start with overlap %d\n",overlap));
if (this->hittype == GMAP) {
@@ -1573,39 +1629,14 @@ Stage3end_genomicbound_from_start (Univcoord_T *genomicbound, T this, int overla
return true;
} else {
debug11(printf(" Computing on substrings\n"));
- if (this->substring0 != NULL) {
- debug11(printf(" Substring 0 has length %d\n",Substring_match_length_orig(this->substring0)));
- if ((substring_length = Substring_match_length_orig(this->substring0)) >= overlap) {
- if (this->plusp == true) {
- *genomicbound = Substring_alignstart(this->substring0) + overlap;
- } else {
- *genomicbound = Substring_alignstart(this->substring0) - overlap;
- }
- return true;
- } else {
- overlap -= substring_length;
- }
- }
-
- if ((substring_length = Substring_match_length_orig(this->substring1)) >= overlap) {
- debug11(printf(" Substring 1 has length %d\n",Substring_match_length_orig(this->substring1)));
- if (this->plusp == true) {
- *genomicbound = Substring_alignstart(this->substring1) + overlap;
- } else {
- *genomicbound = Substring_alignstart(this->substring1) - overlap;
- }
- return true;
- } else {
- overlap -= substring_length;
- }
-
- if (this->substring2 != NULL) {
- debug11(printf(" Substring 2 has length %d\n",Substring_match_length_orig(this->substring2)));
- if ((substring_length = Substring_match_length_orig(this->substring2)) >= overlap) {
+ for (p = this->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ debug11(printf(" Substring as length %d\n",Substring_match_length_orig(substring)));
+ if ((substring_length = Substring_match_length_orig(substring)) >= overlap) {
if (this->plusp == true) {
- *genomicbound = Substring_alignstart(this->substring2) + overlap;
+ *genomicbound = Substring_alignstart(substring) + overlap;
} else {
- *genomicbound = Substring_alignstart(this->substring2) - overlap;
+ *genomicbound = Substring_alignstart(substring) - overlap;
}
return true;
} else {
@@ -1621,6 +1652,8 @@ Stage3end_genomicbound_from_start (Univcoord_T *genomicbound, T this, int overla
bool
Stage3end_genomicbound_from_end (Univcoord_T *genomicbound, T this, int overlap, Univcoord_T chroffset) {
int substring_length;
+ List_T p;
+ Substring_T substring;
debug11(printf("Stage3end_genomicbound_from_end with overlap %d\n",overlap));
if (this->hittype == GMAP) {
@@ -1629,39 +1662,14 @@ Stage3end_genomicbound_from_end (Univcoord_T *genomicbound, T this, int overlap,
return true;
} else {
debug11(printf(" Computing on substrings\n"));
- if (this->substring2 != NULL) {
- debug11(printf(" Substring 2 has length %d\n",Substring_match_length_orig(this->substring2)));
- if ((substring_length = Substring_match_length_orig(this->substring2)) >= overlap) {
- if (this->plusp == true) {
- *genomicbound = Substring_alignend(this->substring2) - overlap;
- } else {
- *genomicbound = Substring_alignend(this->substring2) + overlap;
- }
- return true;
- } else {
- overlap -= substring_length;
- }
- }
-
- if ((substring_length = Substring_match_length_orig(this->substring1)) >= overlap) {
- debug11(printf(" Substring 1 has length %d\n",Substring_match_length_orig(this->substring1)));
- if (this->plusp == true) {
- *genomicbound = Substring_alignend(this->substring1) - overlap;
- } else {
- *genomicbound = Substring_alignend(this->substring1) + overlap;
- }
- return true;
- } else {
- overlap -= substring_length;
- }
-
- if (this->substring0 != NULL) {
- debug11(printf(" Substring 0 has length %d\n",Substring_match_length_orig(this->substring0)));
- if ((substring_length = Substring_match_length_orig(this->substring0)) >= overlap) {
+ for (p = this->substrings_Nto1; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ debug11(printf(" Substring has length %d\n",Substring_match_length_orig(substring)));
+ if ((substring_length = Substring_match_length_orig(substring)) >= overlap) {
if (this->plusp == true) {
- *genomicbound = Substring_alignend(this->substring0) - overlap;
+ *genomicbound = Substring_alignend(substring) - overlap;
} else {
- *genomicbound = Substring_alignend(this->substring0) + overlap;
+ *genomicbound = Substring_alignend(substring) + overlap;
}
return true;
} else {
@@ -1677,8 +1685,13 @@ Stage3end_genomicbound_from_end (Univcoord_T *genomicbound, T this, int overlap,
void
Stage3end_free (T *old) {
+ List_T p;
+ Substring_T substring;
+ Junction_T junction;
+
debug0(printf("Freeing Stage3end %p of type %s\n",*old,hittype_string((*old)->hittype)));
+#if 0
FREE_OUT((*old)->ambcoords_donor);
FREE_OUT((*old)->ambcoords_acceptor);
FREE_OUT((*old)->amb_knowni_donor);
@@ -1687,28 +1700,32 @@ Stage3end_free (T *old) {
FREE_OUT((*old)->amb_nmismatches_acceptor);
FREE_OUT((*old)->amb_probs_donor);
FREE_OUT((*old)->amb_probs_acceptor);
+#endif
- if ((*old)->deletion != NULL) {
- FREE_OUT((*old)->deletion);
+ if ((*old)->cigar_tokens != NULL) {
+ Pair_tokens_free(&(*old)->cigar_tokens);
}
if ((*old)->pairarray != NULL) {
FREE_OUT((*old)->pairarray);
}
- if ((*old)->substring1 != NULL) {
- Substring_free(&(*old)->substring1);
- }
- if ((*old)->substring2 != NULL) {
- Substring_free(&(*old)->substring2);
- }
- if ((*old)->substring0 != NULL) {
- Substring_free(&(*old)->substring0);
+ for (p = (*old)->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ Substring_free(&substring);
}
+ List_free(&(*old)->substrings_1toN);
+ List_free(&(*old)->substrings_Nto1);
+ List_free(&(*old)->substrings_LtoH);
- List_free(&(*old)->substring_LtoH);
+ for (p = (*old)->junctions_1toN; p != NULL; p = List_next(p)) {
+ junction = (Junction_T) List_head(p);
+ Junction_free(&junction);
+ }
+ List_free(&(*old)->junctions_1toN);
+ List_free(&(*old)->junctions_Nto1);
+ List_free(&(*old)->junctions_LtoH);
-
FREE_OUT(*old);
return;
}
@@ -1823,10 +1840,10 @@ gmap5_substring3_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3, Substrin
if (hit5->plusp == true) {
start = Substring_alignstart_trim(substring) - chroffset;
end = Substring_alignend_trim(substring) - 1U - chroffset; /* inclusive */
- debug13(printf("plus goal: %u up to %u\n",start,end));
+ debug15(printf("plus goal: %u up to %u\n",start,end));
i = 0;
while (i < hit5->npairs) {
- debug13(printf(" pair %d: genomepos %u\n",i,hit5->pairarray[i].genomepos));
+ debug15(printf(" pair %d: genomepos %u\n",i,hit5->pairarray[i].genomepos));
if (hit5->pairarray[i].gapp == true) {
/* Skip intron */
i++;
@@ -1838,7 +1855,7 @@ gmap5_substring3_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3, Substrin
} else if (hit5->pairarray[i].genomepos > end) {
i++;
} else {
- debug13(printf("Returning common point at %llu\n",(unsigned long long) hit5->pairarray[i].genomepos));
+ debug15(printf("Returning common point at %llu\n",(unsigned long long) hit5->pairarray[i].genomepos));
return hit5->pairarray[i].genomepos + chroffset;
}
}
@@ -1847,10 +1864,10 @@ gmap5_substring3_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3, Substrin
} else {
start = Substring_alignstart_trim(substring) - 1U - chroffset; /* inclusive */
end = Substring_alignend_trim(substring) - chroffset;
- debug13(printf("minus goal: %u up to %u\n",end,start));
+ debug15(printf("minus goal: %u up to %u\n",end,start));
i = hit5->npairs - 1;
while (i >= 0) {
- debug13(printf(" pair %d: genomepos %u\n",i,hit5->pairarray[i].genomepos));
+ debug15(printf(" pair %d: genomepos %u\n",i,hit5->pairarray[i].genomepos));
if (hit5->pairarray[i].gapp == true) {
/* Skip intron */
i--;
@@ -1862,7 +1879,7 @@ gmap5_substring3_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3, Substrin
} else if (hit5->pairarray[i].genomepos < end) {
i--;
} else {
- debug13(printf("Returning common point at %llu\n",(unsigned long long) hit5->pairarray[i].genomepos));
+ debug15(printf("Returning common point at %llu\n",(unsigned long long) hit5->pairarray[i].genomepos));
return hit5->pairarray[i].genomepos + chroffset;
}
}
@@ -1880,11 +1897,11 @@ substring5_gmap3_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3, Substrin
if (hit5->plusp == true) {
start = Substring_alignstart_trim(substring) - chroffset;
end = Substring_alignend_trim(substring) - 1U - chroffset; /* inclusive */
- debug13(printf("plus goal: %u up to %u\n",start,end));
+ debug15(printf("plus goal: %u up to %u\n",start,end));
j = 0;
while (j < hit3->npairs) {
- debug13(printf(" pair %d: genomepos %u\n",j,hit3->pairarray[j].genomepos));
+ debug15(printf(" pair %d: genomepos %u\n",j,hit3->pairarray[j].genomepos));
if (hit3->pairarray[j].gapp == true) {
/* Skip intron */
j++;
@@ -1896,7 +1913,7 @@ substring5_gmap3_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3, Substrin
} else if (hit3->pairarray[j].genomepos > end) {
j++;
} else {
- debug13(printf("Returning common point at %llu\n",(unsigned long long) hit3->pairarray[j].genomepos));
+ debug15(printf("Returning common point at %llu\n",(unsigned long long) hit3->pairarray[j].genomepos));
return hit3->pairarray[j].genomepos + chroffset;
}
}
@@ -1905,10 +1922,10 @@ substring5_gmap3_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3, Substrin
} else {
start = Substring_alignstart_trim(substring) - 1U - chroffset; /* inclusive */
end = Substring_alignend_trim(substring) - chroffset;
- debug13(printf("minus goal: %u up to %u\n",end,start));
+ debug15(printf("minus goal: %u up to %u\n",end,start));
j = hit3->npairs - 1;
while (j >= 0) {
- debug13(printf(" pair %d: genomepos %u\n",j,hit3->pairarray[j].genomepos));
+ debug15(printf(" pair %d: genomepos %u\n",j,hit3->pairarray[j].genomepos));
if (hit3->pairarray[j].gapp == true) {
/* Skip intron */
j--;
@@ -1920,7 +1937,7 @@ substring5_gmap3_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3, Substrin
} else if (hit3->pairarray[j].genomepos < end) {
j--;
} else {
- debug13(printf("Returning common point at %llu\n",(unsigned long long) hit3->pairarray[j].genomepos));
+ debug15(printf("Returning common point at %llu\n",(unsigned long long) hit3->pairarray[j].genomepos));
return hit3->pairarray[j].genomepos + chroffset;
}
}
@@ -1933,11 +1950,15 @@ substring5_gmap3_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3, Substrin
static bool
find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T common_genomicpos, Univcoord_T chroffset) {
int i;
+ List_T p, q;
+ Substring_T substring;
+ Junction_T junction;
+
- debug13(printf("Finding ilengths for common_genomicpos %u\n",(Chrpos_T) (common_genomicpos - chroffset)));
+ debug15(printf("Finding ilengths for common_genomicpos %u\n",(Chrpos_T) (common_genomicpos - chroffset)));
if (hit->hittype == GMAP) {
- debug13(printf("Type is GMAP\n"));
- debug13(Pair_dump_array(hit->pairarray,hit->npairs,true));
+ debug15(printf("Type is GMAP\n"));
+ debug15(Pair_dump_array(hit->pairarray,hit->npairs,true));
i = 0;
while (i < hit->npairs && hit->pairarray[i].genomepos != common_genomicpos - chroffset) {
i++;
@@ -1951,94 +1972,108 @@ find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T
*ilength_low = hit->pairarray[hit->npairs - 1].querypos - hit->pairarray[i].querypos + 1;
*ilength_high = hit->pairarray[i].querypos - hit->pairarray[0].querypos + 1;
}
- debug13(printf("GMAP: Have ilength_low %d and ilength_high %d\n",*ilength_low,*ilength_high));
+ debug15(printf("GMAP: Have ilength_low %d and ilength_high %d\n",*ilength_low,*ilength_high));
+ return true;
} else if (hit->plusp == true) {
- debug13(printf("plus. Checking common genomicpos %llu against substring0 %p, substring1 %p, substring2 %p\n",
- common_genomicpos,hit->substring0,hit->substring1,hit->substring2));
+#ifdef DEBUG15
+ printf("plus. Checking common genomicpos %llu against\n",
+ common_genomicpos - hit->chroffset);
+ for (p = hit->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ printf("substring %p: %u..%u\n",
+ substring,Substring_alignstart_trim(substring) - hit->chroffset,
+ Substring_alignend_trim(substring) - 1U - hit->chroffset);
+ }
+ printf("\n");
+#endif
/* Plus: Subtract 1 from alignend */
- if (Substring_overlap_point_trimmed_p(hit->substring0,common_genomicpos)) {
- debug13(printf("substring0: %u..%u\n",
- Substring_alignstart_trim(hit->substring0) - hit->chroffset,
- Substring_alignend_trim(hit->substring0) - 1U - hit->chroffset));
- *ilength_low = (common_genomicpos - Substring_alignstart_trim(hit->substring0) + 1);
- *ilength_high = ((Substring_alignend_trim(hit->substring0) - 1) - common_genomicpos + 1)
- + Substring_genomic_alignment_length(hit->substring1)
- + Substring_genomic_alignment_length(hit->substring2);
-
- } else if (Substring_overlap_point_trimmed_p(hit->substring1,common_genomicpos)) {
- debug13(printf("substring1: %u..%u\n",
- Substring_alignstart_trim(hit->substring1) - hit->chroffset,
- Substring_alignend_trim(hit->substring1) - 1U - hit->chroffset));
- *ilength_low = Substring_genomic_alignment_length(hit->substring0) +
- (common_genomicpos - Substring_alignstart_trim(hit->substring1) + 1);
- *ilength_high = ((Substring_alignend_trim(hit->substring1) - 1) - common_genomicpos + 1)
- + Substring_genomic_alignment_length(hit->substring2);
- if (hit->hittype == INSERTION) {
- *ilength_high += hit->nindels;
- }
-
- } else if (Substring_overlap_point_trimmed_p(hit->substring2,common_genomicpos)) {
- debug13(printf("substring2: %u..%u\n",
- Substring_alignstart_trim(hit->substring2) - hit->chroffset,
- Substring_alignend_trim(hit->substring2) - 1U - hit->chroffset));
- *ilength_low = Substring_genomic_alignment_length(hit->substring0) +
- Substring_genomic_alignment_length(hit->substring1) +
- (common_genomicpos - Substring_alignstart_trim(hit->substring2) + 1);
- *ilength_high = ((Substring_alignend_trim(hit->substring2) - 1) - common_genomicpos + 1);
- if (hit->hittype == INSERTION) {
- *ilength_low += hit->nindels;
- }
+ *ilength_low = 0;
+ for (p = hit->substrings_1toN, q = hit->junctions_1toN; p != NULL; p = List_next(p), q = List_next(q)) {
+ substring = (Substring_T) List_head(p);
+ debug15(printf("substring %p: %u..%u\n",substring,
+ Substring_alignstart_trim(substring) - hit->chroffset,
+ Substring_alignend_trim(substring) - 1U - hit->chroffset));
+ if (Substring_overlap_point_trimmed_p(substring,common_genomicpos) == false) {
+ *ilength_low += Substring_genomic_alignment_length(substring);
+ if (q != NULL) {
+ junction = (Junction_T) List_head(q);
+ if (Junction_type(junction) == INS_JUNCTION) {
+ *ilength_low += Junction_nindels(junction);
+ }
+ }
- } else {
- return false;
+ } else {
+ *ilength_low += (common_genomicpos - Substring_alignstart_trim(substring) + 1);
+ *ilength_high = ((Substring_alignend_trim(substring) - 1) - common_genomicpos + 1);
+ p = List_next(p);
+ while (p != NULL) {
+ substring = (Substring_T) List_head(p);
+ *ilength_high += Substring_genomic_alignment_length(substring);
+ p = List_next(p);
+ }
+ while (q != NULL) {
+ junction = (Junction_T) List_head(q);
+ if (Junction_type(junction) == INS_JUNCTION) {
+ *ilength_high += Junction_nindels(junction);
+ }
+ q = List_next(q);
+ }
+ debug15(printf("Plus: Have ilength_low %d and ilength_high %d\n",*ilength_low,*ilength_high));
+ return true;
+ }
}
- debug13(printf("Plus: Have ilength_low %d and ilength_high %d\n",*ilength_low,*ilength_high));
-
} else {
- debug13(printf("minus. Checking common genomicpos %llu against substring0 %p, substring1 %p, substring2 %p\n",
- common_genomicpos,hit->substring0,hit->substring1,hit->substring2));
+#ifdef DEBUG15
+ printf("minus. Checking common genomicpos %llu against\n",
+ common_genomicpos - hit->chroffset);
+ for (p = hit->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ printf("substring %p: %u..%u\n",
+ substring,Substring_alignstart_trim(substring) - hit->chroffset,
+ Substring_alignend_trim(substring) - 1U - hit->chroffset);
+ }
+ printf("\n");
+#endif
/* Minus: Subtract 1 from alignstart */
- if (Substring_overlap_point_trimmed_p(hit->substring0,common_genomicpos)) {
- debug13(printf("substring0: %u..%u\n",
- Substring_alignstart_trim(hit->substring0) - 1U - hit->chroffset,
- Substring_alignend_trim(hit->substring0) - hit->chroffset));
- *ilength_low = Substring_genomic_alignment_length(hit->substring2) +
- Substring_genomic_alignment_length(hit->substring1) +
- (common_genomicpos - (Substring_alignend_trim(hit->substring0) /*+ 1*/) + 1);
- *ilength_high = ((Substring_alignstart_trim(hit->substring0) - 1) - common_genomicpos + 1);
-
- } else if (Substring_overlap_point_trimmed_p(hit->substring1,common_genomicpos)) {
- debug13(printf("substring1: %u..%u\n",
- Substring_alignstart_trim(hit->substring1) - 1U - hit->chroffset,
- Substring_alignend_trim(hit->substring1) - hit->chroffset));
- *ilength_low = Substring_genomic_alignment_length(hit->substring2) +
- (common_genomicpos - (Substring_alignend_trim(hit->substring1) /*+ 1*/) + 1);
- *ilength_high = ((Substring_alignstart_trim(hit->substring1) - 1) - common_genomicpos + 1)
- + Substring_genomic_alignment_length(hit->substring0);
- if (hit->hittype == INSERTION) {
- *ilength_low += hit->nindels;
- }
-
- } else if (Substring_overlap_point_trimmed_p(hit->substring2,common_genomicpos)) {
- debug13(printf("substring2: %u..%u\n",
- Substring_alignstart_trim(hit->substring2) - 1U - hit->chroffset,
- Substring_alignend_trim(hit->substring2) - hit->chroffset));
- *ilength_low = (common_genomicpos - (Substring_alignend_trim(hit->substring2) /*+ 1*/) + 1);
- *ilength_high = ((Substring_alignstart_trim(hit->substring2) - 1) - common_genomicpos + 1)
- + Substring_genomic_alignment_length(hit->substring1)
- + Substring_genomic_alignment_length(hit->substring0);
- if (hit->hittype == INSERTION) {
- *ilength_high += hit->nindels;
- }
+ *ilength_high = 0;
+ for (p = hit->substrings_1toN, q = hit->junctions_1toN; p != NULL; p = List_next(p), q = List_next(q)) {
+ substring = (Substring_T) List_head(p);
+ debug15(printf("substring: %u..%u\n",
+ Substring_alignstart_trim(substring) - 1U - hit->chroffset,
+ Substring_alignend_trim(substring) - hit->chroffset));
+ if (Substring_overlap_point_trimmed_p(substring,common_genomicpos) == false) {
+ *ilength_high += Substring_genomic_alignment_length(substring);
+ if (q != NULL) {
+ junction = (Junction_T) List_head(q);
+ if (Junction_type(junction) == INS_JUNCTION) {
+ *ilength_high += Junction_nindels(junction);
+ }
+ }
- } else {
- return false;
+ } else {
+ *ilength_high += ((Substring_alignstart_trim(substring) - 1) - common_genomicpos + 1);
+ *ilength_low = (common_genomicpos - (Substring_alignend_trim(substring) /*+ 1*/) + 1);
+ p = List_next(p);
+ while (p != NULL) {
+ substring = (Substring_T) List_head(p);
+ *ilength_low += Substring_genomic_alignment_length(substring);
+ p = List_next(p);
+ }
+ while (q != NULL) {
+ junction = (Junction_T) List_head(q);
+ if (Junction_type(junction) == INS_JUNCTION) {
+ *ilength_low += Junction_nindels(junction);
+ }
+ q = List_next(q);
+ }
+ debug15(printf("Minus: Have ilength_low %d and ilength_high %d\n",*ilength_low,*ilength_high));
+ return true;
+ }
}
- debug13(printf("Minus: Have ilength_low %d and ilength_high %d\n",*ilength_low,*ilength_high));
}
- return true;
+ return false;
}
@@ -2049,9 +2084,11 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
Univcoord_T common_genomicpos;
int i, j;
Univcoord_T start5, end5, start3, end3;
+ List_T p, q;
+ Substring_T substring, substring5, substring3;
if (hit5->hittype == GMAP && hit3->hittype == GMAP) {
- debug13(printf("Computing overlap using dual GMAP\n"));
+ debug15(printf("Computing overlap using dual GMAP\n"));
if (hit5->plusp == true) {
i = j = 0;
while (i < hit5->npairs && j < hit3->npairs) {
@@ -2072,12 +2109,12 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else if (hit5->pairarray[i].genomepos > hit3->pairarray[j].genomepos) {
j++;
} else {
- debug13(printf("GMAP and GMAP show overlap at position %d, querypos %d and %d\n",
+ debug15(printf("GMAP and GMAP show overlap at position %d, querypos %d and %d\n",
hit5->pairarray[i].genomepos,hit5->pairarray[i].querypos,hit3->pairarray[j].querypos));
return hit5->pairarray[i].genomepos + hit5->chroffset;
}
}
- debug13(printf("GMAP and GMAP show no overlap\n"));
+ debug15(printf("GMAP and GMAP show no overlap\n"));
return 0U;
} else {
@@ -2100,48 +2137,44 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else if (hit5->pairarray[i].genomepos < hit3->pairarray[j].genomepos) {
j++;
} else {
- debug13(printf("GMAP and GMAP show overlap at position %d, querypos %d and %d\n",
+ debug15(printf("GMAP and GMAP show overlap at position %d, querypos %d and %d\n",
hit5->pairarray[i].genomepos,hit5->pairarray[i].querypos,hit3->pairarray[j].querypos));
return hit5->pairarray[i].genomepos + hit5->chroffset;
}
}
- debug13(printf("GMAP and GMAP show no overlap\n"));
+ debug15(printf("GMAP and GMAP show no overlap\n"));
return 0U;
}
-
+
} else if (hit5->hittype == GMAP) {
- debug13(printf("Computing common point using 5' GMAP\n"));
- if ((common_genomicpos = gmap5_substring3_common_genomicpos(hit5,hit3,hit3->substring1)) != 0) {
- return common_genomicpos;
- } else if (hit3->substring2 != NULL && (common_genomicpos = gmap5_substring3_common_genomicpos(hit5,hit3,hit3->substring2)) != 0) {
- return common_genomicpos;
- } else if (hit3->substring0 != NULL && (common_genomicpos = gmap5_substring3_common_genomicpos(hit5,hit3,hit3->substring0)) != 0) {
- return common_genomicpos;
- } else {
- return 0U;
+ debug15(printf("Computing common point using 5' GMAP\n"));
+ for (p = hit3->substrings_LtoH; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if ((common_genomicpos = gmap5_substring3_common_genomicpos(hit5,hit3,substring)) != 0) {
+ return common_genomicpos;
+ }
}
+ return 0U;
} else if (hit3->hittype == GMAP) {
- debug13(printf("Computing common point using 3' GMAP\n"));
- if ((common_genomicpos = substring5_gmap3_common_genomicpos(hit5,hit3,hit5->substring1)) != 0) {
- return common_genomicpos;
- } else if (hit5->substring2 != NULL && (common_genomicpos = substring5_gmap3_common_genomicpos(hit5,hit3,hit5->substring2)) != 0) {
- return common_genomicpos;
- } else if (hit5->substring0 != NULL && (common_genomicpos = substring5_gmap3_common_genomicpos(hit5,hit3,hit5->substring0)) != 0) {
- return common_genomicpos;
- } else {
- return 0U;
+ debug15(printf("Computing common point using 3' GMAP\n"));
+ for (p = hit5->substrings_LtoH; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if ((common_genomicpos = substring5_gmap3_common_genomicpos(hit5,hit3,substring)) != 0) {
+ return common_genomicpos;
+ }
}
+ return 0U;
} else if (hit5->plusp == true && hit3->plusp == true) {
/* plus/plus */
- debug13(printf("Computing overlap using substrings plus/plus\n"));
+ debug15(printf("Computing overlap using substrings plus/plus\n"));
- start5 = hit5->genomicstart + hit5->trim_left + hit5->start_amb_length;
- end5 = (hit5->genomicend - 1) - hit5->trim_right - hit5->end_amb_length;
- start3 = hit3->genomicstart + hit3->trim_left + hit3->start_amb_length;
- end3 = (hit3->genomicend - 1) - hit3->trim_right - hit3->end_amb_length;
- debug13(printf("hit5 endpoints are %u..%u. hit3 endpoints are %u..%u\n",
+ start5 = hit5->genomicstart + hit5->trim_left + start_amb_length(hit5);
+ end5 = (hit5->genomicend - 1) - hit5->trim_right - end_amb_length(hit5);
+ start3 = hit3->genomicstart + hit3->trim_left + start_amb_length(hit3);
+ end3 = (hit3->genomicend - 1) - hit3->trim_right - end_amb_length(hit3);
+ debug15(printf("hit5 endpoints are %u..%u. hit3 endpoints are %u..%u\n",
start5-hit5->chroffset,end5-hit5->chroffset,start3-hit3->chroffset,end3-hit3->chroffset));
if (end3 < start5) {
@@ -2153,35 +2186,32 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else if (start3 < start5) {
if (end3 < end5) {
/* Case 2: Tails overlap. Go from start5 to end3 */
- debug13(printf("plus/plus case 2a: start5 %u\n",start5 - hit5->chroffset));
- if (Substring_overlap_point_trimmed_p(hit3->substring0,start5)) {
- return start5;
- } else if (Substring_overlap_point_trimmed_p(hit3->substring1,start5)) {
- return start5;
- } else if (Substring_overlap_point_trimmed_p(hit3->substring2,start5)) {
- return start5;
+ debug15(printf("plus/plus case 2a: start5 %u\n",start5 - hit5->chroffset));
+ for (p = hit3->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_overlap_point_trimmed_p(substring,start5)) {
+ return start5;
+ }
}
/* Case 2: Tails overlap. Go from start5 to end3 */
- debug13(printf("plus/plus case 2b: end3 %u\n",end3 - hit3->chroffset));
- if (Substring_overlap_point_trimmed_p(hit5->substring2,end3)) {
- return end3;
- } else if (Substring_overlap_point_trimmed_p(hit5->substring1,end3)) {
- return end3;
- } else if (Substring_overlap_point_trimmed_p(hit5->substring0,end3)) {
- return end3;
+ debug15(printf("plus/plus case 2b: end3 %u\n",end3 - hit3->chroffset));
+ for (p = hit5->substrings_Nto1; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_overlap_point_trimmed_p(substring,end3)) {
+ return end3;
+ }
}
/* Fall through to general algorithm */
} else {
/* Case 3: hit3 subsumes hit5 */
- debug13(printf("plus/plus case 3\n"));
- if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
- return end5;
- } else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
- return end5;
- } else if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
- return end5;
+ debug15(printf("plus/plus case 3\n"));
+ for (p = hit3->substrings_Nto1; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_overlap_point_trimmed_p(substring,end5)) {
+ return end5;
+ }
}
/* Fall through to general algorithm */
}
@@ -2189,89 +2219,61 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else {
if (end3 < end5) {
/* Case 4: hit5 subsumes hit3 */
- debug13(printf("plus/plus case 4\n"));
- if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
- return start3;
- } else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
- return start3;
- } else if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
- return start3;
+ debug15(printf("plus/plus case 4\n"));
+ for (p = hit5->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_overlap_point_trimmed_p(substring,start3)) {
+ return start3;
+ }
}
/* Fall through to general algorithm */
} else {
/* Case 5: Based on hit3_trimmed_length */
- debug13(printf("plus/plus case 5a\n"));
- if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
- return start3;
- } else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
- return start3;
- } else if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
- return start3;
+ debug15(printf("plus/plus case 5a\n"));
+ for (p = hit5->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_overlap_point_trimmed_p(substring,start3)) {
+ return start3;
+ }
}
/* Case 5: Based on hit5_trimmed_length */
- debug13(printf("plus/plus case 5b\n"));
- if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
- return end5;
- } else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
- return end5;
- } else if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
- return end5;
+ debug15(printf("plus/plus case 5b\n"));
+ for (p = hit3->substrings_Nto1; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_overlap_point_trimmed_p(substring,end5)) {
+ return end5;
+ }
}
/* Fall through to general algorithm */
}
}
/* General algorithm */
- debug13(printf("plus general: hit3->substring1\n"));
- if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring1)) != 0) {
- return common_genomicpos;
- } else if (hit5->substring2 != NULL &&
- (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring2,hit3->substring1)) != 0) {
- return common_genomicpos;
- } else if (hit5->substring0 != NULL &&
- (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring0,hit3->substring1)) != 0) {
- return common_genomicpos;
- }
-
- if (hit3->substring2 != NULL) {
- debug13(printf("plus general: hit3->substring2\n"));
- if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring2)) != 0) {
- return common_genomicpos;
- } else if (hit5->substring2 != NULL &&
- (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring2,hit3->substring2)) != 0) {
- return common_genomicpos;
- } else if (hit5->substring0 != NULL &&
- (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring0,hit3->substring2)) != 0) {
- return common_genomicpos;
+ debug15(printf("plus/plus general\n"));
+ for (p = hit3->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring3 = (Substring_T) List_head(p);
+ for (q = hit5->substrings_1toN; q != NULL; q = List_next(q)) {
+ substring5 = (Substring_T) List_head(q);
+ if ((common_genomicpos = Substring_overlap_segment_trimmed(substring5,substring3)) != 0) {
+ return common_genomicpos;
+ }
}
}
- if (hit3->substring0 != NULL) {
- debug13(printf("plus general: hit3->substring0\n"));
- if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring0)) != 0) {
- return common_genomicpos;
- } else if (hit5->substring2 != NULL &&
- (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring2,hit3->substring0)) != 0) {
- return common_genomicpos;
- } else if (hit5->substring0 != NULL &&
- (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring0,hit3->substring0)) != 0) {
- return common_genomicpos;
- }
- }
-
return 0;
} else if (hit5->plusp == true && hit3->plusp == false) {
/* plus/minus */
- debug13(printf("Computing overlap using substrings plus/minus\n"));
+ debug15(printf("Computing overlap using substrings plus/minus\n"));
return 0;
- start5 = hit5->genomicstart + hit5->trim_left + hit5->start_amb_length;
- end5 = hit5->genomicend - hit5->trim_right - hit5->end_amb_length;
- start3 = hit3->genomicstart - hit3->trim_left - hit3->start_amb_length;
- end3 = hit3->genomicend + hit3->trim_right + hit3->end_amb_length;
+#if 0
+ start5 = hit5->genomicstart + hit5->trim_left + start_amb_length(hit5);
+ end5 = hit5->genomicend - hit5->trim_right - end_amb_length(hit5);
+ start3 = hit3->genomicstart - hit3->trim_left - start_amb_length(hit3);
+ end3 = hit3->genomicend + hit3->trim_right + end_amb_length(hit3);
if (start3 < start5) {
/* Case 1 */
@@ -2282,7 +2284,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else if (end3 < start5) {
if (start3 < end5) {
/* Case 2: Tails overlap. Go from start5 to start3 */
- debug13(printf("plus case 2a: start5 %u\n",start5 - hit5->chroffset));
+ debug15(printf("plus case 2a: start5 %u\n",start5 - hit5->chroffset));
if (Substring_overlap_point_trimmed_p(hit3->substring0,start5)) {
return start5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,start5)) {
@@ -2292,7 +2294,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
}
/* Case 2: Tails overlap. Go from start5 to start3 */
- debug13(printf("plus case 2b: start3 %u\n",start3 - hit3->chroffset));
+ debug15(printf("plus case 2b: start3 %u\n",start3 - hit3->chroffset));
if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
@@ -2304,7 +2306,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else {
/* Case 3: hit3 subsumes hit5 */
- debug13(printf("plus case 3\n"));
+ debug15(printf("plus case 3\n"));
if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
@@ -2318,7 +2320,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else {
if (start3 < end5) {
/* Case 4: hit5 subsumes hit3 */
- debug13(printf("plus case 4\n"));
+ debug15(printf("plus case 4\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,end3)) {
return end3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,end3)) {
@@ -2330,7 +2332,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else {
/* Case 5: Based on hit3_trimmed_length */
- debug13(printf("plus case 5a\n"));
+ debug15(printf("plus case 5a\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,end3)) {
return end3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,end3)) {
@@ -2340,7 +2342,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
}
/* Case 5: Based on hit5_trimmed_length */
- debug13(printf("plus case 5b\n"));
+ debug15(printf("plus case 5b\n"));
if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
@@ -2353,7 +2355,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
}
/* General algorithm */
- debug13(printf("plus general: hit3->substring1\n"));
+ debug15(printf("plus general: hit3->substring1\n"));
if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring1)) != 0) {
return common_genomicpos;
} else if (hit5->substring2 != NULL &&
@@ -2365,7 +2367,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
}
if (hit3->substring2 != NULL) {
- debug13(printf("plus general: hit3->substring2\n"));
+ debug15(printf("plus general: hit3->substring2\n"));
if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring2)) != 0) {
return common_genomicpos;
} else if (hit5->substring2 != NULL &&
@@ -2378,7 +2380,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
}
if (hit3->substring0 != NULL) {
- debug13(printf("plus general: hit3->substring0\n"));
+ debug15(printf("plus general: hit3->substring0\n"));
if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring0)) != 0) {
return common_genomicpos;
} else if (hit5->substring2 != NULL &&
@@ -2389,18 +2391,20 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
return common_genomicpos;
}
}
-
+
return 0U;
+#endif
} else if (hit5->plusp == false && hit3->plusp == true) {
/* minus/plus */
- debug13(printf("Computing overlap using substrings minus/plus\n"));
+ debug15(printf("Computing overlap using substrings minus/plus\n"));
return 0;
- start5 = hit5->genomicstart - hit5->trim_left - hit5->start_amb_length;
- end5 = hit5->genomicend + hit5->trim_right + hit5->end_amb_length;
- start3 = hit3->genomicstart + hit3->trim_left + hit3->start_amb_length;
- end3 = hit3->genomicend - hit3->trim_right - hit3->end_amb_length;
+#if 0
+ start5 = hit5->genomicstart - hit5->trim_left - start_amb_length(hit5);
+ end5 = hit5->genomicend + hit5->trim_right + end_amb_length(hit5);
+ start3 = hit3->genomicstart + hit3->trim_left + start_amb_length(hit3);
+ end3 = hit3->genomicend - hit3->trim_right - end_amb_length(hit3);
if (end3 < end5) {
/* Case 1 */
@@ -2411,7 +2415,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else if (start3 < end5) {
if (end3 < start5) {
/* Case 2: Tails overlap. Go from end5 to end3 */
- debug13(printf("plus case 2a: end5 %u\n",end5 - hit5->chroffset));
+ debug15(printf("plus case 2a: end5 %u\n",end5 - hit5->chroffset));
if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
return end5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
@@ -2421,7 +2425,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
}
/* Case 2: Tails overlap. Go from end5 to end3 */
- debug13(printf("plus case 2b: end3 %u\n",end3 - hit3->chroffset));
+ debug15(printf("plus case 2b: end3 %u\n",end3 - hit3->chroffset));
if (Substring_overlap_point_trimmed_p(hit5->substring2,end3)) {
return end3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,end3)) {
@@ -2433,7 +2437,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else {
/* Case 3: hit3 subsumes hit5 */
- debug13(printf("plus case 3\n"));
+ debug15(printf("plus case 3\n"));
if (Substring_overlap_point_trimmed_p(hit3->substring2,start5)) {
return start5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,start5)) {
@@ -2447,7 +2451,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else {
if (end3 < start5) {
/* Case 4: hit5 subsumes hit3 */
- debug13(printf("plus case 4\n"));
+ debug15(printf("plus case 4\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
@@ -2459,7 +2463,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else {
/* Case 5: Based on hit3_trimmed_length */
- debug13(printf("plus case 5a\n"));
+ debug15(printf("plus case 5a\n"));
if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
return start3;
} else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
@@ -2469,7 +2473,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
}
/* Case 5: Based on hit5_trimmed_length */
- debug13(printf("plus case 5b\n"));
+ debug15(printf("plus case 5b\n"));
if (Substring_overlap_point_trimmed_p(hit3->substring2,start5)) {
return start5;
} else if (Substring_overlap_point_trimmed_p(hit3->substring1,start5)) {
@@ -2482,7 +2486,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
}
/* General algorithm */
- debug13(printf("plus general: hit3->substring1\n"));
+ debug15(printf("plus general: hit3->substring1\n"));
if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring1)) != 0) {
return common_genomicpos;
} else if (hit5->substring2 != NULL &&
@@ -2494,7 +2498,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
}
if (hit3->substring2 != NULL) {
- debug13(printf("plus general: hit3->substring2\n"));
+ debug15(printf("plus general: hit3->substring2\n"));
if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring2)) != 0) {
return common_genomicpos;
} else if (hit5->substring2 != NULL &&
@@ -2507,7 +2511,7 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
}
if (hit3->substring0 != NULL) {
- debug13(printf("plus general: hit3->substring0\n"));
+ debug15(printf("plus general: hit3->substring0\n"));
if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring0)) != 0) {
return common_genomicpos;
} else if (hit5->substring2 != NULL &&
@@ -2518,18 +2522,19 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
return common_genomicpos;
}
}
-
+
return 0;
+#endif
} else if (hit5->plusp == false && hit3->plusp == false) {
/* minus/minus */
- debug13(printf("Computing overlap using substrings minus/minus\n"));
+ debug15(printf("Computing overlap using substrings minus/minus\n"));
- start5 = (hit5->genomicstart - 1) - hit5->trim_left - hit5->start_amb_length;
- end5 = hit5->genomicend + hit5->trim_right + hit5->end_amb_length;
- start3 = (hit3->genomicstart - 1) - hit3->trim_left - hit3->start_amb_length;
- end3 = hit3->genomicend + hit3->trim_right + hit3->end_amb_length;
- debug13(printf("hit5 endpoints are %u..%u. hit3 endpoints are %u..%u\n",
+ start5 = (hit5->genomicstart - 1) - hit5->trim_left - start_amb_length(hit5);
+ end5 = hit5->genomicend + hit5->trim_right + end_amb_length(hit5);
+ start3 = (hit3->genomicstart - 1) - hit3->trim_left - start_amb_length(hit3);
+ end3 = hit3->genomicend + hit3->trim_right + end_amb_length(hit3);
+ debug15(printf("hit5 endpoints are %u..%u. hit3 endpoints are %u..%u\n",
start5-hit5->chroffset,end5-hit5->chroffset,start3-hit3->chroffset,end3-hit3->chroffset));
if (end3 > start5) {
@@ -2541,117 +2546,83 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
} else if (start3 > start5) {
if (end3 > end5) {
/* Case 2: Tails overlap. Go from start5 to end3 */
- debug13(printf("minus/minus case 2a: start5 %llu (%u)\n",start5,start5 - hit5->chroffset));
- if (Substring_overlap_point_trimmed_p(hit3->substring0,start5)) {
- debug13(printf("Success on hit3->substring0\n"));
- return start5;
- } else if (Substring_overlap_point_trimmed_p(hit3->substring1,start5)) {
- debug13(printf("Success on hit3->substring1\n"));
- return start5;
- } else if (Substring_overlap_point_trimmed_p(hit3->substring2,start5)) {
- debug13(printf("Success on hit3->substring2\n"));
- return start5;
+ debug15(printf("minus/minus case 2a: start5 %llu (%u)\n",start5,start5 - hit5->chroffset));
+ for (p = hit3->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_overlap_point_trimmed_p(substring,start5)) {
+ return start5;
+ }
}
/* Case 2: Tails overlap. Go from start5 to end3 */
- debug13(printf("plus case 2b: end3 %u\n",end3 - hit3->chroffset));
- if (Substring_overlap_point_trimmed_p(hit5->substring2,end3)) {
- return end3;
- } else if (Substring_overlap_point_trimmed_p(hit5->substring1,end3)) {
- return end3;
- } else if (Substring_overlap_point_trimmed_p(hit5->substring0,end3)) {
- return end3;
+ debug15(printf("plus case 2b: end3 %u\n",end3 - hit3->chroffset));
+ for (p = hit5->substrings_Nto1; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_overlap_point_trimmed_p(substring,end3)) {
+ return end3;
+ }
}
/* Fall through to general algorithm */
-
+
} else {
/* Case 3: hit3 subsumes hit5 */
- debug13(printf("minus/minus case 3: end5 %u\n",end5 - hit5->chroffset));
- if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
- return end5;
- } else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
- return end5;
- } else if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
- return end5;
+ debug15(printf("minus/minus case 3: end5 %u\n",end5 - hit5->chroffset));
+ for (p = hit3->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_overlap_point_trimmed_p(substring,end5)) {
+ return end5;
+ }
}
+
/* Fall through to general algorithm */
}
} else {
if (end3 > end5) {
/* Case 4: hit5 subsumes hit3 */
- debug13(printf("minus/minus case 4: start3 %u\n",(Chrpos_T) (start3 - hit3->chroffset)));
- if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
- return start3;
- } else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
- return start3;
- } else if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
- return start3;
+ debug15(printf("minus/minus case 4: start3 %u\n",(Chrpos_T) (start3 - hit3->chroffset)));
+ for (p = hit5->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_overlap_point_trimmed_p(substring,start3)) {
+ return start3;
+ }
}
/* Fall through to general algorithm */
} else {
/* Case 5: Based on hit3_trimmed_length */
- debug13(printf("minus case 5a: start3 %u\n",start3 - hit3->chroffset));
- if (Substring_overlap_point_trimmed_p(hit5->substring0,start3)) {
- return start3;
- } else if (Substring_overlap_point_trimmed_p(hit5->substring1,start3)) {
- return start3;
- } else if (Substring_overlap_point_trimmed_p(hit5->substring2,start3)) {
- return start3;
+ debug15(printf("minus case 5a: start3 %u\n",start3 - hit3->chroffset));
+ for (p = hit5->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_overlap_point_trimmed_p(substring,start3)) {
+ return start3;
+ }
}
/* Case 5: Based on hit5_trimmed_length */
- debug13(printf("minus case 5b: end5 %u\n",end5 - hit5->chroffset));
- if (Substring_overlap_point_trimmed_p(hit3->substring2,end5)) {
- return end5;
- } else if (Substring_overlap_point_trimmed_p(hit3->substring1,end5)) {
- return end5;
- } else if (Substring_overlap_point_trimmed_p(hit3->substring0,end5)) {
- return end5;
+ debug15(printf("minus case 5b: end5 %u\n",end5 - hit5->chroffset));
+ for (p = hit3->substrings_Nto1; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_overlap_point_trimmed_p(substring,end5)) {
+ return end5;
+ }
}
/* Fall through to general algorithm */
}
}
/* General algorithm */
- debug13(printf("minus/minus general: hit3->substring1\n"));
- if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring1)) != 0) {
- return common_genomicpos;
- } else if (hit5->substring2 != NULL &&
- (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring2,hit3->substring1)) != 0) {
- return common_genomicpos;
- } else if (hit5->substring0 != NULL &&
- (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring0,hit3->substring1)) != 0) {
- return common_genomicpos;
- }
-
- if (hit3->substring2 != NULL) {
- debug13(printf("minus general: hit3->substring2\n"));
- if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring2)) != 0) {
- return common_genomicpos;
- } else if (hit5->substring2 != NULL &&
- (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring2,hit3->substring2)) != 0) {
- return common_genomicpos;
- } else if (hit5->substring0 != NULL &&
- (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring0,hit3->substring2)) != 0) {
- return common_genomicpos;
+ debug15(printf("minus/minus general\n"));
+ for (p = hit3->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring3 = (Substring_T) List_head(p);
+ for (q = hit5->substrings_1toN; q != NULL; q = List_next(q)) {
+ substring5 = (Substring_T) List_head(q);
+ if ((common_genomicpos = Substring_overlap_segment_trimmed(substring5,substring3)) != 0) {
+ return common_genomicpos;
+ }
}
}
- if (hit3->substring0 != NULL) {
- debug13(printf("minus general: hit3->substring0\n"));
- if ((common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring1,hit3->substring0)) != 0) {
- return common_genomicpos;
- } else if (hit5->substring2 != NULL &&
- (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring2,hit3->substring0)) != 0) {
- return common_genomicpos;
- } else if (hit5->substring0 != NULL &&
- (common_genomicpos = Substring_overlap_segment_trimmed(hit5->substring0,hit3->substring0)) != 0) {
- return common_genomicpos;
- }
- }
-
return 0;
} else {
@@ -2662,16 +2633,21 @@ pair_common_genomicpos (Stage3end_T hit5, Stage3end_T hit3) {
static bool
-test_hardclips (Univcoord_T *common_genomicpos, int hardclip_low, Stage3end_T hit_low, int low_querylength,
- int hardclip_high, Stage3end_T hit_high, int high_querylength, Univcoord_T chroffset) {
+test_hardclips (Univcoord_T *common_genomicpos, int hardclip_low, Stage3end_T hit_low,
+ int hardclip_high, Stage3end_T hit_high, Univcoord_T chroffset) {
Substring_T low_substring, high_substring;
struct Pair_T *low_pairarray, *high_pairarray;
int low_npairs, high_npairs;
int low_querypos, high_querypos;
+ int low_querylength, high_querylength;
bool plusp;
- debug13(printf("Entering test_hardclips with hardclip_low %d, hardclip_high %d\n",
+ low_querylength = hit_low->querylength;
+ high_querylength = hit_high->querylength;
+
+ debug15(printf("Entering test_hardclips with hardclip_low %d, hardclip_high %d\n",
hardclip_low,hardclip_high));
+ debug15(printf("querylength_low %d, querylength_high %d\n",low_querylength,high_querylength));
plusp = Stage3end_plusp(hit_low);
@@ -2683,39 +2659,39 @@ test_hardclips (Univcoord_T *common_genomicpos, int hardclip_low, Stage3end_T hi
if (plusp == true) {
low_querypos = hardclip_low;
- high_querypos = high_querylength - 1 - hardclip_high;
+ high_querypos = high_querylength /*- 1*/ - hardclip_high;
} else {
- low_querypos = low_querylength - 1 - hardclip_low;
+ low_querypos = low_querylength /*- 1*/ - hardclip_low;
high_querypos = hardclip_high;
}
- debug13(printf("Dual GMAP. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ debug15(printf("Dual GMAP. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false) {
- debug13(printf("Fails because low_querypos %d is not in low_pairarray\n",low_querypos));
+ debug15(printf("Fails because low_querypos %d is not in low_pairarray\n",low_querypos));
return false;
} else if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false) {
- debug13(printf("Fails because low_querypos %d - 1 is not in low_pairarray\n",low_querypos));
+ debug15(printf("Fails because low_querypos %d - 1 is not in low_pairarray\n",low_querypos));
return false;
} else if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false) {
- debug13(printf("Fails because low_querypos %d + 1 is not in low_pairarray\n",low_querypos));
+ debug15(printf("Fails because low_querypos %d + 1 is not in low_pairarray\n",low_querypos));
return false;
} else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false) {
- debug13(printf("Fails because high_querypos %d is not in high_pairarray\n",low_querypos));
+ debug15(printf("Fails because high_querypos %d is not in high_pairarray\n",low_querypos));
return false;
} else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false) {
- debug13(printf("Fails because high_querypos %d - 1 is not in high_pairarray\n",low_querypos));
+ debug15(printf("Fails because high_querypos %d - 1 is not in high_pairarray\n",low_querypos));
return false;
} else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false) {
- debug13(printf("Fails because high_querypos %d + 1 is not in high_pairarray\n",low_querypos));
+ debug15(printf("Fails because high_querypos %d + 1 is not in high_pairarray\n",low_querypos));
return false;
} else if (Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != Pairarray_lookup(high_pairarray,high_npairs,high_querypos)) {
- debug13(printf("Fails because low genomicpos %u != high genomicpos %u\n",
+ debug15(printf("Fails because low genomicpos %u != high genomicpos %u\n",
Pairarray_lookup(low_pairarray,low_npairs,low_querypos),
Pairarray_lookup(high_pairarray,high_npairs,high_querypos)));
return false;
} else {
*common_genomicpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos) + chroffset;
- debug13(printf("Succeeds with common point %u\n",*common_genomicpos - chroffset));
+ debug15(printf("Succeeds with common point %u\n",*common_genomicpos - chroffset));
return true;
}
@@ -2725,51 +2701,51 @@ test_hardclips (Univcoord_T *common_genomicpos, int hardclip_low, Stage3end_T hi
if (plusp == true) {
low_querypos = hardclip_low;
- high_querypos = high_querylength - 1 - hardclip_high;
+ high_querypos = high_querylength /*- 1*/ - hardclip_high;
} else {
- low_querypos = low_querylength - 1 - hardclip_low;
+ low_querypos = low_querylength /*- 1*/ - hardclip_low;
high_querypos = hardclip_high;
}
- debug13(printf("Low GMAP. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ debug15(printf("Low GMAP. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false) {
- debug13(printf("Fails because low_querypos %d is not in low_pairarray\n",low_querypos));
+ debug15(printf("Fails because low_querypos %d is not in low_pairarray\n",low_querypos));
return false;
} else if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false) {
- debug13(printf("Fails because low_querypos %d - 1 is not in low_pairarray\n",low_querypos));
+ debug15(printf("Fails because low_querypos %d - 1 is not in low_pairarray\n",low_querypos));
return false;
} else if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos+1) == false) {
- debug13(printf("Fails because low_querypos %d + 1 is not in low_pairarray\n",low_querypos));
+ debug15(printf("Fails because low_querypos %d + 1 is not in low_pairarray\n",low_querypos));
return false;
} else if ((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL) {
- debug13(printf("Fails because high_querypos %d gives a NULL substring\n",high_querypos));
+ debug15(printf("Fails because high_querypos %d gives a NULL substring\n",high_querypos));
return false;
} else if (Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring) {
- debug13(printf("Fails because high_querypos %d - 1 gives substring %p\n",
+ debug15(printf("Fails because high_querypos %d - 1 gives substring %p\n",
high_querypos,Stage3end_substring_containing(hit_high,high_querypos-1)));
return false;
} else if (Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring) {
- debug13(printf("Fails because high_querypos %d + 1 gives substring %p\n",
+ debug15(printf("Fails because high_querypos %d + 1 gives substring %p\n",
high_querypos,Stage3end_substring_containing(hit_high,high_querypos+1)));
return false;
} else if (plusp == true) {
- if (Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != Substring_genomicstart_adj(high_substring) + high_querypos - chroffset) {
- debug13(printf("Fails because low chrpos %u != high chrpos %u\n",
+ if (Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != Substring_genomicstart(high_substring) + high_querypos - chroffset) {
+ debug15(printf("Fails because low chrpos %u != high chrpos %u\n",
Pairarray_lookup(low_pairarray,low_npairs,low_querypos),
- Substring_genomicstart_adj(high_substring) + high_querypos - chroffset));
+ Substring_genomicstart(high_substring) + high_querypos - chroffset));
return false;
}
} else {
- if (Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset) {
- debug13(printf("Fails because low chrpos %u != high chrpos %u\n",
+ if (Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != (Substring_genomicstart(high_substring) - 1) - high_querypos - chroffset) {
+ debug15(printf("Fails because low chrpos %u != high chrpos %u\n",
Pairarray_lookup(low_pairarray,low_npairs,low_querypos),
- (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset));
+ (Substring_genomicstart(high_substring) - 1) - high_querypos - chroffset));
return false;
}
}
*common_genomicpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos) + chroffset;
- debug13(printf("Succeeds with common point %u\n",*common_genomicpos - chroffset));
+ debug15(printf("Succeeds with common point %u\n",*common_genomicpos - chroffset));
return true;
} else if (Stage3end_hittype(hit_high) == GMAP) {
@@ -2778,127 +2754,127 @@ test_hardclips (Univcoord_T *common_genomicpos, int hardclip_low, Stage3end_T hi
if (plusp == true) {
low_querypos = hardclip_low;
- high_querypos = high_querylength - 1 - hardclip_high;
+ high_querypos = high_querylength /*- 1*/ - hardclip_high;
} else {
- low_querypos = low_querylength - 1 - hardclip_low;
+ low_querypos = low_querylength /*- 1*/ - hardclip_low;
high_querypos = hardclip_high;
}
- debug13(printf("High GMAP. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ debug15(printf("High GMAP. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
- debug13(printf("Fails because low_querypos %d gives a NULL substring\n",low_querypos));
+ debug15(printf("Fails because low_querypos %d gives a NULL substring\n",low_querypos));
return false;
} else if (Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring) {
- debug13(printf("Fails because low_querypos %d - 1 gives substring %p\n",
+ debug15(printf("Fails because low_querypos %d - 1 gives substring %p\n",
low_querypos,Stage3end_substring_containing(hit_low,low_querypos-1)));
return false;
} else if (Stage3end_substring_containing(hit_low,low_querypos+1) != low_substring) {
- debug13(printf("Fails because low_querypos %d + 1 gives substring %p\n",
+ debug15(printf("Fails because low_querypos %d + 1 gives substring %p\n",
low_querypos,Stage3end_substring_containing(hit_low,low_querypos+1)));
return false;
} else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false) {
- debug13(printf("Fails because high_querypos %d is not in high_pairarray\n",low_querypos));
+ debug15(printf("Fails because high_querypos %d is not in high_pairarray\n",low_querypos));
return false;
} else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false) {
- debug13(printf("Fails because high_querypos %d - 1 is not in high_pairarray\n",low_querypos));
+ debug15(printf("Fails because high_querypos %d - 1 is not in high_pairarray\n",low_querypos));
return false;
} else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos+1) == false) {
- debug13(printf("Fails because high_querypos %d + 1 is not in high_pairarray\n",low_querypos));
+ debug15(printf("Fails because high_querypos %d + 1 is not in high_pairarray\n",low_querypos));
return false;
} else if (plusp == true) {
- if (Pairarray_lookup(high_pairarray,high_npairs,high_querypos) != Substring_genomicstart_adj(low_substring) + low_querypos - chroffset) {
- debug13(printf("Fails because low chrpos %u != high chrpos %u\n",
- Substring_genomicstart_adj(low_substring) + low_querypos - chroffset,
+ if (Pairarray_lookup(high_pairarray,high_npairs,high_querypos) != Substring_genomicstart(low_substring) + low_querypos - chroffset) {
+ debug15(printf("Fails because low chrpos %u != high chrpos %u\n",
+ Substring_genomicstart(low_substring) + low_querypos - chroffset,
Pairarray_lookup(high_pairarray,high_npairs,high_querypos)));
return false;
}
} else {
- if (Pairarray_lookup(high_pairarray,high_npairs,high_querypos) != (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset) {
- debug13(printf("Fails because low chrpos %u != high chrpos %u\n",
- (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset,
+ if (Pairarray_lookup(high_pairarray,high_npairs,high_querypos) != (Substring_genomicstart(low_substring) - 1) - low_querypos - chroffset) {
+ debug15(printf("Fails because low chrpos %u != high chrpos %u\n",
+ (Substring_genomicstart(low_substring) - 1) - low_querypos - chroffset,
Pairarray_lookup(high_pairarray,high_npairs,high_querypos)));
return false;
}
}
*common_genomicpos = Pairarray_lookup(high_pairarray,high_npairs,high_querypos) + chroffset;
- debug13(printf("Succeeds with common point %u\n",*common_genomicpos - chroffset));
+ debug15(printf("Succeeds with common point %u\n",*common_genomicpos - chroffset));
return true;
} else {
if (plusp == true) {
low_querypos = hardclip_low;
- high_querypos = high_querylength - 1 - hardclip_high;
- debug13(printf("Both substrings, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ high_querypos = high_querylength /*- 1*/ - hardclip_high;
+ debug15(printf("Both substrings, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
- debug13(printf("Fails because low_querypos %d gives a NULL substring\n",low_querypos));
+ debug15(printf("Fails because low_querypos %d gives a NULL substring\n",low_querypos));
return false;
} else if (Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring) {
- debug13(printf("Fails because low_querypos %d - 1 gives substring %p\n",
+ debug15(printf("Fails because low_querypos %d - 1 gives substring %p\n",
low_querypos,Stage3end_substring_containing(hit_low,low_querypos-1)));
return false;
} else if (Stage3end_substring_containing(hit_low,low_querypos+1) != low_substring) {
- debug13(printf("Fails because low_querypos %d + 1 gives substring %p\n",
+ debug15(printf("Fails because low_querypos %d + 1 gives substring %p\n",
low_querypos,Stage3end_substring_containing(hit_low,low_querypos+1)));
return false;
} else if ((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL) {
- debug13(printf("Fails because high_querypos %d gives a NULL substring\n",high_querypos));
+ debug15(printf("Fails because high_querypos %d gives a NULL substring\n",high_querypos));
return false;
} else if (Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring) {
- debug13(printf("Fails because high_querypos %d - 1 gives substring %p\n",
+ debug15(printf("Fails because high_querypos %d - 1 gives substring %p\n",
high_querypos,Stage3end_substring_containing(hit_high,high_querypos-1)));
return false;
} else if (Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring) {
- debug13(printf("Fails because high_querypos %d + 1 gives substring %p\n",
+ debug15(printf("Fails because high_querypos %d + 1 gives substring %p\n",
high_querypos,Stage3end_substring_containing(hit_high,high_querypos+1)));
return false;
- } else if (Substring_genomicstart_adj(low_substring) + low_querypos - chroffset != Substring_genomicstart_adj(high_substring) + high_querypos - chroffset) {
- debug13(printf("Fails because low chrpos %u != high chrpos %u\n",
- Substring_genomicstart_adj(low_substring) + low_querypos - chroffset,
- Substring_genomicstart_adj(high_substring) + high_querypos - chroffset));
+ } else if (Substring_genomicstart(low_substring) + low_querypos - chroffset != Substring_genomicstart(high_substring) + high_querypos - chroffset) {
+ debug15(printf("Fails because low chrpos %u != high chrpos %u\n",
+ Substring_genomicstart(low_substring) + low_querypos - chroffset,
+ Substring_genomicstart(high_substring) + high_querypos - chroffset));
return false;
} else {
- *common_genomicpos = Substring_genomicstart_adj(low_substring) + low_querypos; /* Want univcoord */
- debug13(printf("Succeeds with common point %u\n",*common_genomicpos - chroffset));
+ *common_genomicpos = Substring_genomicstart(low_substring) + low_querypos; /* Want univcoord */
+ debug15(printf("Succeeds with common point %u\n",*common_genomicpos - chroffset));
return true;
}
} else {
- low_querypos = low_querylength - 1 - hardclip_low;
+ low_querypos = low_querylength /*- 1*/ - hardclip_low;
high_querypos = hardclip_high;
- debug13(printf("Both substrings, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ debug15(printf("Both substrings, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
- debug13(printf("Fails because low_querypos %d gives a NULL substring\n",low_querypos));
+ debug15(printf("Fails because low_querypos %d gives a NULL substring\n",low_querypos));
return false;
} else if (Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring) {
- debug13(printf("Fails because low_querypos %d - 1 gives substring %p\n",
+ debug15(printf("Fails because low_querypos %d - 1 gives substring %p\n",
low_querypos,Stage3end_substring_containing(hit_low,low_querypos-1)));
return false;
} else if (Stage3end_substring_containing(hit_low,low_querypos+1) != low_substring) {
- debug13(printf("Fails because low_querypos %d + 1 gives substring %p\n",
+ debug15(printf("Fails because low_querypos %d + 1 gives substring %p\n",
low_querypos,Stage3end_substring_containing(hit_low,low_querypos+1)));
return false;
} else if ((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL) {
- debug13(printf("Fails because high_querypos %d gives a NULL substring\n",high_querypos));
+ debug15(printf("Fails because high_querypos %d gives a NULL substring\n",high_querypos));
return false;
} else if (Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring) {
- debug13(printf("Fails because high_querypos %d - 1 gives substring %p\n",
+ debug15(printf("Fails because high_querypos %d - 1 gives substring %p\n",
high_querypos,Stage3end_substring_containing(hit_high,high_querypos-1)));
return false;
} else if (Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring) {
- debug13(printf("Fails because high_querypos %d + 1 gives substring %p\n",
+ debug15(printf("Fails because high_querypos %d + 1 gives substring %p\n",
high_querypos,Stage3end_substring_containing(hit_high,high_querypos+1)));
return false;
- } else if ((Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset != (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset) {
- debug13(printf("Fails because low chrpos %u != high chrpos %u\n",
- (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset,
- (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset));
+ } else if ((Substring_genomicstart(low_substring) - 1) - low_querypos - chroffset != (Substring_genomicstart(high_substring) - 1) - high_querypos - chroffset) {
+ debug15(printf("Fails because low chrpos %u != high chrpos %u\n",
+ (Substring_genomicstart(low_substring) - 1) - low_querypos - chroffset,
+ (Substring_genomicstart(high_substring) - 1) - high_querypos - chroffset));
return false;
} else {
- *common_genomicpos = (Substring_genomicstart_adj(low_substring) - 1) - low_querypos; /* Want univcoord */
- debug13(printf("Succeeds with common point %u\n",*common_genomicpos - chroffset));
+ *common_genomicpos = (Substring_genomicstart(low_substring) - 1) - low_querypos; /* Want univcoord */
+ debug15(printf("Succeeds with common point %u\n",*common_genomicpos - chroffset));
return true;
}
}
@@ -2909,16 +2885,21 @@ test_hardclips (Univcoord_T *common_genomicpos, int hardclip_low, Stage3end_T hi
/* Replaces adjust_hardclips in samprint.c */
static Univcoord_T
-adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int low_querylength,
- int hardclip_high, Stage3end_T hit_high, int high_querylength, Univcoord_T chroffset) {
+adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low,
+ int hardclip_high, Stage3end_T hit_high, Univcoord_T chroffset) {
Substring_T low_substring, high_substring;
struct Pair_T *low_pairarray, *high_pairarray;
int low_npairs, high_npairs;
int low_querypos, high_querypos;
+ int low_querylength, high_querylength;
Chrpos_T low_chrpos, high_chrpos;
bool plusp;
- debug13(printf("Entering adjust_hardclips_right with hardclip_low %d, hardclip_high %d\n",
+
+ low_querylength = hit_low->querylength;
+ high_querylength = hit_high->querylength;
+
+ debug15(printf("Entering adjust_hardclips_right with hardclip_low %d, hardclip_high %d\n",
hardclip_low,hardclip_high));
*shift = 1; /* Making an initial move before each while loop */
plusp = Stage3end_plusp(hit_low);
@@ -2931,12 +2912,12 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
if (plusp == true) {
low_querypos = hardclip_low;
- high_querypos = high_querylength - 1 - hardclip_high;
- debug13(printf("Dual GMAP, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
-
+ high_querypos = high_querylength /*- 1*/ - hardclip_high;
+ debug15(printf("Dual GMAP, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
low_querypos++;
high_querypos++;
- debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
while ((low_querypos + 1) < low_querylength && (high_querypos + 1) < high_querylength &&
(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
@@ -2954,17 +2935,17 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
low_chrpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos);
high_chrpos = Pairarray_lookup(high_pairarray,high_npairs,high_querypos);
if (low_chrpos < high_chrpos) {
- debug13(printf("low_chrpos %u < high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
+ debug15(printf("low_chrpos %u < high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
low_querypos++;
} else if (high_chrpos < low_chrpos) {
- debug13(printf("high_chrpos %u < low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
+ debug15(printf("high_chrpos %u < low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
high_querypos++;
} else {
low_querypos++;
high_querypos++;
}
}
- debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
}
if ((low_querypos + 1) >= low_querylength || (high_querypos + 1) >= high_querylength) {
@@ -2981,13 +2962,13 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
}
} else {
- low_querypos = low_querylength - 1 - hardclip_low;
+ low_querypos = low_querylength /*- 1*/ - hardclip_low;
high_querypos = hardclip_high;
- debug13(printf("Dual GMAP, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ debug15(printf("Dual GMAP, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
low_querypos--;
high_querypos--;
- debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
while ((low_querypos - 1) >= 0 && (high_querypos - 1) >= 0 &&
(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
@@ -3005,17 +2986,17 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
low_chrpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos);
high_chrpos = Pairarray_lookup(high_pairarray,high_npairs,high_querypos);
if (low_chrpos < high_chrpos) {
- debug13(printf("low_chrpos %u < high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
+ debug15(printf("low_chrpos %u < high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
low_querypos--;
} else if (high_chrpos < low_chrpos) {
- debug13(printf("high_chrpos %u < low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
+ debug15(printf("high_chrpos %u < low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
high_querypos--;
} else {
low_querypos--;
high_querypos--;
}
}
- debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
}
if ((low_querypos - 1) < 0 || (high_querypos - 1) < 0) {
@@ -3038,12 +3019,12 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
if (plusp == true) {
low_querypos = hardclip_low;
- high_querypos = high_querylength - 1 - hardclip_high;
- debug13(printf("Low GMAP, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ high_querypos = high_querylength /*- 1*/ - hardclip_high;
+ debug15(printf("Low GMAP, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
low_querypos++;
high_querypos++;
- debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
while ((low_querypos + 1) < low_querylength && (high_querypos + 1) < high_querylength &&
(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
@@ -3051,7 +3032,7 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
(high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL ||
Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring ||
Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring ||
- Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != Substring_genomicstart_adj(high_substring) + high_querypos - chroffset)) {
+ Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != Substring_genomicstart(high_substring) + high_querypos - chroffset)) {
(*shift) += 1;
if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false) {
low_querypos++;
@@ -3059,27 +3040,27 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
high_querypos++;
} else {
low_chrpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos);
- high_chrpos = Substring_genomicstart_adj(high_substring) + high_querypos - chroffset;
+ high_chrpos = Substring_genomicstart(high_substring) + high_querypos - chroffset;
if (low_chrpos < high_chrpos) {
- debug13(printf("low_chrpos %u < high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
+ debug15(printf("low_chrpos %u < high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
low_querypos++;
} else if (high_chrpos < low_chrpos) {
- debug13(printf("high_chrpos %u < low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
+ debug15(printf("high_chrpos %u < low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
high_querypos++;
} else {
low_querypos++;
high_querypos++;
}
}
- debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
}
if ((low_querypos + 1) >= low_querylength || (high_querypos + 1) >= high_querylength) {
- debug13(printf("Failing because low_querypos %d + 1 >= low_querylength %d\n",low_querypos,low_querylength));
+ debug15(printf("Failing because low_querypos %d + 1 >= low_querylength %d\n",low_querypos,low_querylength));
*shift = 0;
return 0;
} else if (Stage3end_substring_containing(hit_high,high_querypos) == NULL) {
- debug13(printf("Failing because no substring contains high_querypos %d\n",high_querypos));
+ debug15(printf("Failing because no substring contains high_querypos %d\n",high_querypos));
*shift = 0;
return 0;
} else {
@@ -3093,13 +3074,13 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
}
} else {
- low_querypos = low_querylength - 1 - hardclip_low;
+ low_querypos = low_querylength /*- 1*/ - hardclip_low;
high_querypos = hardclip_high;
- debug13(printf("Low GMAP, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ debug15(printf("Low GMAP, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
low_querypos--;
high_querypos--;
- debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
while ((low_querypos - 1) >= 0 && (high_querypos - 1) >= 0 &&
(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
@@ -3107,7 +3088,7 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
(high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL ||
Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring ||
Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring ||
- Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset)) {
+ Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != (Substring_genomicstart(high_substring) - 1) - high_querypos - chroffset)) {
(*shift) += 1;
if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false) {
low_querypos--;
@@ -3115,27 +3096,27 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
high_querypos--;
} else {
low_chrpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos);
- high_chrpos = (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset;
+ high_chrpos = (Substring_genomicstart(high_substring) - 1) - high_querypos - chroffset;
if (low_chrpos < high_chrpos) {
- debug13(printf("low_chrpos %u < high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
+ debug15(printf("low_chrpos %u < high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
low_querypos--;
} else if (high_chrpos < low_chrpos) {
- debug13(printf("high_chrpos %u < low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
+ debug15(printf("high_chrpos %u < low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
high_querypos--;
} else {
low_querypos--;
high_querypos--;
}
}
- debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
}
if ((low_querypos - 1) < 0 || (high_querypos - 1) < 0) {
- debug13(printf("Failing because low_querypos %d - 1 < 0\n",low_querypos));
+ debug15(printf("Failing because low_querypos %d - 1 < 0\n",low_querypos));
*shift = 0;
return 0;
} else if (Stage3end_substring_containing(hit_high,high_querypos) == NULL) {
- debug13(printf("Failing because no substring contains high_querypos %d\n",high_querypos));
+ debug15(printf("Failing because no substring contains high_querypos %d\n",high_querypos));
*shift = 0;
return 0;
} else {
@@ -3155,12 +3136,12 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
if (plusp == true) {
low_querypos = hardclip_low;
- high_querypos = high_querylength - 1 - hardclip_high;
- debug13(printf("High GMAP. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ high_querypos = high_querylength /*- 1*/ - hardclip_high;
+ debug15(printf("High GMAP. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
low_querypos++;
high_querypos++;
- debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
while ((high_querypos + 1) < high_querylength && (low_querypos + 1) < low_querylength &&
(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false ||
Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false ||
@@ -3168,27 +3149,27 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
(low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring ||
Stage3end_substring_containing(hit_low,low_querypos+1) != low_substring ||
- Pairarray_lookup(high_pairarray,high_npairs,high_querypos) != Substring_genomicstart_adj(low_substring) + low_querypos - chroffset)) {
+ Pairarray_lookup(high_pairarray,high_npairs,high_querypos) != Substring_genomicstart(low_substring) + low_querypos - chroffset)) {
(*shift) += 1;
if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
low_querypos++;
} else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false) {
high_querypos++;
} else {
- low_chrpos = Substring_genomicstart_adj(low_substring) + low_querypos - chroffset;
+ low_chrpos = Substring_genomicstart(low_substring) + low_querypos - chroffset;
high_chrpos = Pairarray_lookup(high_pairarray,high_npairs,high_querypos);
if (low_chrpos < high_chrpos) {
- debug13(printf("low_chrpos %u < high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
+ debug15(printf("low_chrpos %u < high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
low_querypos++;
} else if (high_chrpos < low_chrpos) {
- debug13(printf("high_chrpos %u < low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
+ debug15(printf("high_chrpos %u < low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
high_querypos++;
} else {
low_querypos++;
high_querypos++;
}
}
- debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
}
if ((high_querypos + 1) >= high_querylength || (low_querypos + 1) >= low_querylength ||
@@ -3206,13 +3187,13 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
}
} else {
- low_querypos = low_querylength - 1 - hardclip_low;
+ low_querypos = low_querylength /*- 1*/ - hardclip_low;
high_querypos = hardclip_high;
- debug13(printf("High GMAP, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ debug15(printf("High GMAP, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
low_querypos--;
high_querypos--;
- debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
while ((high_querypos - 1) >= 0 && (low_querypos - 1) >= 0 &&
(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false ||
Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false ||
@@ -3220,27 +3201,27 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
(low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring ||
Stage3end_substring_containing(hit_low,low_querypos+1) != low_substring ||
- Pairarray_lookup(high_pairarray,high_npairs,high_querypos) != (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset)) {
+ Pairarray_lookup(high_pairarray,high_npairs,high_querypos) != (Substring_genomicstart(low_substring) - 1) - low_querypos - chroffset)) {
(*shift) += 1;
if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
low_querypos--;
} else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false) {
high_querypos--;
} else {
- low_chrpos = (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset;
+ low_chrpos = (Substring_genomicstart(low_substring) - 1) - low_querypos - chroffset;
high_chrpos = Pairarray_lookup(high_pairarray,high_npairs,high_querypos);
if (low_chrpos < high_chrpos) {
- debug13(printf("low_chrpos %u < high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
+ debug15(printf("low_chrpos %u < high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
low_querypos--;
} else if (high_chrpos < low_chrpos) {
- debug13(printf("high_chrpos %u < low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
+ debug15(printf("high_chrpos %u < low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
high_querypos--;
} else {
low_querypos--;
high_querypos--;
}
}
- debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
}
if ((high_querypos - 1) < 0 || (low_querypos - 1) < 0 ||
@@ -3261,12 +3242,12 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
} else {
if (plusp == true) {
low_querypos = hardclip_low;
- high_querypos = high_querylength - 1 - hardclip_high;
- debug13(printf("Both substrings, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ high_querypos = high_querylength /*- 1*/ - hardclip_high;
+ debug15(printf("Both substrings, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
low_querypos++;
high_querypos++;
- debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
while ((low_querypos + 1) < low_querylength && (high_querypos + 1) < high_querylength &&
((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring ||
@@ -3274,27 +3255,27 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
(high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL ||
Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring ||
Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring ||
- Substring_genomicstart_adj(low_substring) + low_querypos - chroffset != Substring_genomicstart_adj(high_substring) + high_querypos - chroffset)) {
+ Substring_genomicstart(low_substring) + low_querypos - chroffset != Substring_genomicstart(high_substring) + high_querypos - chroffset)) {
(*shift) += 1;
if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
low_querypos++;
} else if ((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL) {
high_querypos++;
} else {
- low_chrpos = Substring_genomicstart_adj(low_substring) + low_querypos - chroffset;
- high_chrpos = Substring_genomicstart_adj(high_substring) + high_querypos - chroffset;
+ low_chrpos = Substring_genomicstart(low_substring) + low_querypos - chroffset;
+ high_chrpos = Substring_genomicstart(high_substring) + high_querypos - chroffset;
if (low_chrpos < high_chrpos) {
- debug13(printf("low_chrpos %u < high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
+ debug15(printf("low_chrpos %u < high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
low_querypos++;
} else if (high_chrpos < low_chrpos) {
- debug13(printf("high_chrpos %u < low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
+ debug15(printf("high_chrpos %u < low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
high_querypos++;
} else {
low_querypos++;
high_querypos++;
}
}
- debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
}
if ((low_querypos + 1) >= low_querylength ||
@@ -3304,7 +3285,7 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
*shift = 0;
return 0;
} else {
- debug13(printf("Returning %u + %d\n",Substring_genomicstart_adj(low_substring) - chroffset,
+ debug15(printf("Returning %u + %d\n",Substring_genomicstart(low_substring) - chroffset,
low_querypos));
assert((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) != NULL);
assert((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) != NULL);
@@ -3312,17 +3293,17 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
assert(Stage3end_substring_containing(hit_low,low_querypos+1) == low_substring);
assert(Stage3end_substring_containing(hit_high,high_querypos-1) == high_substring);
assert(Stage3end_substring_containing(hit_high,high_querypos+1) == high_substring);
- return Substring_genomicstart_adj(low_substring) + low_querypos; /* Want univcoord */
+ return Substring_genomicstart(low_substring) + low_querypos; /* Want univcoord */
}
} else {
- low_querypos = low_querylength - 1 - hardclip_low;
+ low_querypos = low_querylength /*- 1*/ - hardclip_low;
high_querypos = hardclip_high;
- debug13(printf("Both substrings, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ debug15(printf("Both substrings, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
low_querypos--;
high_querypos--;
- debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
while ((low_querypos - 1) >= 0 && (high_querypos - 1) >= 0 &&
((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring ||
@@ -3330,27 +3311,27 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
(high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL ||
Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring ||
Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring ||
- (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset != (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset)) {
+ (Substring_genomicstart(low_substring) - 1) - low_querypos - chroffset != (Substring_genomicstart(high_substring) - 1) - high_querypos - chroffset)) {
(*shift) += 1;
if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
low_querypos--;
} else if ((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL) {
high_querypos--;
} else {
- low_chrpos = (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset;
- high_chrpos = (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset;
+ low_chrpos = (Substring_genomicstart(low_substring) - 1) - low_querypos - chroffset;
+ high_chrpos = (Substring_genomicstart(high_substring) - 1) - high_querypos - chroffset;
if (low_chrpos < high_chrpos) {
- debug13(printf("low_chrpos %u < high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
+ debug15(printf("low_chrpos %u < high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
low_querypos--;
} else if (high_chrpos < low_chrpos) {
- debug13(printf("high_chrpos %u < low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
+ debug15(printf("high_chrpos %u < low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
high_querypos--;
} else {
low_querypos--;
high_querypos--;
}
}
- debug13(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("right shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
}
if ((low_querypos - 1) < 0 ||
@@ -3360,7 +3341,7 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
*shift = 0;
return 0;
} else {
- debug13(printf("Returning %u - %d\n",Substring_genomicstart_adj(low_substring) - chroffset,
+ debug15(printf("Returning %u - %d\n",Substring_genomicstart(low_substring) - chroffset,
low_querypos));
assert((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) != NULL);
assert((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) != NULL);
@@ -3368,7 +3349,7 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
assert(Stage3end_substring_containing(hit_low,low_querypos+1) == low_substring);
assert(Stage3end_substring_containing(hit_high,high_querypos-1) == high_substring);
assert(Stage3end_substring_containing(hit_high,high_querypos+1) == high_substring);
- return (Substring_genomicstart_adj(low_substring) - 1) - low_querypos; /* Want univcoord */
+ return (Substring_genomicstart(low_substring) - 1) - low_querypos; /* Want univcoord */
}
}
}
@@ -3377,16 +3358,21 @@ adjust_hardclips_right (int *shift, int hardclip_low, Stage3end_T hit_low, int l
/* Replaces adjust_hardclips in samprint.c */
static Univcoord_T
-adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int low_querylength,
- int hardclip_high, Stage3end_T hit_high, int high_querylength, Univcoord_T chroffset) {
+adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low,
+ int hardclip_high, Stage3end_T hit_high, Univcoord_T chroffset) {
Substring_T low_substring, high_substring;
struct Pair_T *low_pairarray, *high_pairarray;
int low_npairs, high_npairs;
int low_querypos, high_querypos;
+ int low_querylength, high_querylength;
Chrpos_T low_chrpos, high_chrpos;
bool plusp;
- debug13(printf("Entering adjust_hardclips_left with hardclip_low %d, hardclip_high %d\n",
+
+ low_querylength = hit_low->querylength;
+ high_querylength = hit_high->querylength;
+
+ debug15(printf("Entering adjust_hardclips_left with hardclip_low %d, hardclip_high %d\n",
hardclip_low,hardclip_high));
*shift = 1; /* Making an initial move before each while loop */
plusp = Stage3end_plusp(hit_low);
@@ -3399,12 +3385,12 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
if (plusp == true) {
low_querypos = hardclip_low;
- high_querypos = high_querylength - 1 - hardclip_high;
- debug13(printf("Dual GMAP, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
-
+ high_querypos = high_querylength /*- 1*/ - hardclip_high;
+ debug15(printf("Dual GMAP, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+
low_querypos--;
high_querypos--;
- debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
while ((low_querypos - 1) >= 0 && (high_querypos - 1) >= 0 &&
(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
@@ -3422,17 +3408,17 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
low_chrpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos);
high_chrpos = Pairarray_lookup(high_pairarray,high_npairs,high_querypos);
if (low_chrpos > high_chrpos) {
- debug13(printf("low_chrpos %u > high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
+ debug15(printf("low_chrpos %u > high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
low_querypos--;
} else if (high_chrpos > low_chrpos) {
- debug13(printf("high_chrpos %u > low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
+ debug15(printf("high_chrpos %u > low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
high_querypos--;
} else {
low_querypos--;
high_querypos--;
}
}
- debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
}
if ((low_querypos - 1) < 0 || (high_querypos - 1) < 0) {
@@ -3449,13 +3435,13 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
}
} else {
- low_querypos = low_querylength - 1 - hardclip_low;
+ low_querypos = low_querylength /*- 1*/ - hardclip_low;
high_querypos = hardclip_high;
- debug13(printf("Dual GMAP, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ debug15(printf("Dual GMAP, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
low_querypos++;
high_querypos++;
- debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
while ((low_querypos + 1) < low_querylength && (high_querypos + 1) < high_querylength &&
(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
@@ -3473,17 +3459,17 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
low_chrpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos);
high_chrpos = Pairarray_lookup(high_pairarray,high_npairs,high_querypos);
if (low_chrpos > high_chrpos) {
- debug13(printf("low_chrpos %u > high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
+ debug15(printf("low_chrpos %u > high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
low_querypos++;
} else if (high_chrpos > low_chrpos) {
- debug13(printf("high_chrpos %u > low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
+ debug15(printf("high_chrpos %u > low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
high_querypos++;
} else {
low_querypos++;
high_querypos++;
}
}
- debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
}
if ((low_querypos + 1) >= low_querylength || (high_querypos + 1) >= high_querylength) {
@@ -3506,12 +3492,12 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
if (plusp == true) {
low_querypos = hardclip_low;
- high_querypos = high_querylength - 1 - hardclip_high;
- debug13(printf("Low GMAP, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ high_querypos = high_querylength /*- 1*/ - hardclip_high;
+ debug15(printf("Low GMAP, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
low_querypos--;
high_querypos--;
- debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
while ((low_querypos - 1) >= 0 && (high_querypos - 1) >= 0 &&
(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
@@ -3519,7 +3505,7 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
(high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL ||
Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring ||
Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring ||
- Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != Substring_genomicstart_adj(high_substring) + high_querypos - chroffset)) {
+ Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != Substring_genomicstart(high_substring) + high_querypos - chroffset)) {
(*shift) += 1;
if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false) {
low_querypos--;
@@ -3527,19 +3513,19 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
high_querypos--;
} else {
low_chrpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos);
- high_chrpos = Substring_genomicstart_adj(high_substring) + high_querypos - chroffset;
+ high_chrpos = Substring_genomicstart(high_substring) + high_querypos - chroffset;
if (low_chrpos > high_chrpos) {
- debug13(printf("low_chrpos %u > high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
+ debug15(printf("low_chrpos %u > high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
low_querypos--;
} else if (high_chrpos > low_chrpos) {
- debug13(printf("high_chrpos %u > low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
+ debug15(printf("high_chrpos %u > low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
high_querypos--;
} else {
low_querypos--;
high_querypos--;
}
}
- debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
}
if ((low_querypos - 1) < 0 || (high_querypos - 1) < 0 ||
@@ -3557,13 +3543,13 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
}
} else {
- low_querypos = low_querylength - 1 - hardclip_low;
+ low_querypos = low_querylength /*- 1*/ - hardclip_low;
high_querypos = hardclip_high;
- debug13(printf("Low GMAP, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ debug15(printf("Low GMAP, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
low_querypos++;
high_querypos++;
- debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
while ((low_querypos + 1) < low_querylength && (high_querypos + 1) < high_querylength &&
(Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false ||
Pairarray_contains_p(low_pairarray,low_npairs,low_querypos-1) == false ||
@@ -3571,7 +3557,7 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
(high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL ||
Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring ||
Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring ||
- Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset)) {
+ Pairarray_lookup(low_pairarray,low_npairs,low_querypos) != (Substring_genomicstart(high_substring) - 1) - high_querypos - chroffset)) {
(*shift) += 1;
if (Pairarray_contains_p(low_pairarray,low_npairs,low_querypos) == false) {
low_querypos++;
@@ -3579,19 +3565,19 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
high_querypos++;
} else {
low_chrpos = Pairarray_lookup(low_pairarray,low_npairs,low_querypos);
- high_chrpos = (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset;
+ high_chrpos = (Substring_genomicstart(high_substring) - 1) - high_querypos - chroffset;
if (low_chrpos > high_chrpos) {
- debug13(printf("low_chrpos %u > high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
+ debug15(printf("low_chrpos %u > high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
low_querypos++;
} else if (high_chrpos > low_chrpos) {
- debug13(printf("high_chrpos %u > low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
+ debug15(printf("high_chrpos %u > low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
high_querypos++;
} else {
low_querypos++;
high_querypos++;
}
}
- debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
}
if ((low_querypos + 1) >= low_querylength || (high_querypos + 1) >= high_querylength ||
@@ -3615,12 +3601,12 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
if (plusp == true) {
low_querypos = hardclip_low;
- high_querypos = high_querylength - 1 - hardclip_high;
- debug13(printf("High GMAP, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ high_querypos = high_querylength /*- 1*/ - hardclip_high;
+ debug15(printf("High GMAP, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
low_querypos--;
high_querypos--;
- debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
while ((high_querypos - 1) >= 0 && (low_querypos - 1) >= 0 &&
(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false ||
Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false ||
@@ -3628,27 +3614,27 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
(low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring ||
Stage3end_substring_containing(hit_low,low_querypos+1) != low_substring ||
- Pairarray_lookup(high_pairarray,high_npairs,high_querypos) != Substring_genomicstart_adj(low_substring) + low_querypos - chroffset)) {
+ Pairarray_lookup(high_pairarray,high_npairs,high_querypos) != Substring_genomicstart(low_substring) + low_querypos - chroffset)) {
(*shift) += 1;
if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
low_querypos--;
} else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false) {
high_querypos--;
} else {
- low_chrpos = Substring_genomicstart_adj(low_substring) + low_querypos - chroffset;
+ low_chrpos = Substring_genomicstart(low_substring) + low_querypos - chroffset;
high_chrpos = Pairarray_lookup(high_pairarray,high_npairs,high_querypos);
if (low_chrpos > high_chrpos) {
- debug13(printf("low_chrpos %u > high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
+ debug15(printf("low_chrpos %u > high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
low_querypos--;
} else if (high_chrpos > low_chrpos) {
- debug13(printf("high_chrpos %u > low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
+ debug15(printf("high_chrpos %u > low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
high_querypos--;
} else {
low_querypos--;
high_querypos--;
}
}
- debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
}
if ((high_querypos - 1) < 0 || (low_querypos - 1) < 0 ||
@@ -3666,13 +3652,13 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
}
} else {
- low_querypos = low_querylength - 1 - hardclip_low;
+ low_querypos = low_querylength /*- 1*/ - hardclip_low;
high_querypos = hardclip_high;
- debug13(printf("High GMAP, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ debug15(printf("High GMAP, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
low_querypos++;
high_querypos++;
- debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
while ((high_querypos + 1) < high_querylength && (low_querypos + 1) < low_querylength &&
(Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false ||
Pairarray_contains_p(high_pairarray,high_npairs,high_querypos-1) == false ||
@@ -3680,27 +3666,27 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
(low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring ||
Stage3end_substring_containing(hit_low,low_querypos+1) != low_substring ||
- Pairarray_lookup(high_pairarray,high_npairs,high_querypos) != (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset)) {
+ Pairarray_lookup(high_pairarray,high_npairs,high_querypos) != (Substring_genomicstart(low_substring) - 1) - low_querypos - chroffset)) {
(*shift) += 1;
if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
low_querypos++;
} else if (Pairarray_contains_p(high_pairarray,high_npairs,high_querypos) == false) {
high_querypos++;
} else {
- low_chrpos = (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset;
+ low_chrpos = (Substring_genomicstart(low_substring) - 1) - low_querypos - chroffset;
high_chrpos = Pairarray_lookup(high_pairarray,high_npairs,high_querypos);
if (low_chrpos > high_chrpos) {
- debug13(printf("low_chrpos %u > high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
+ debug15(printf("low_chrpos %u > high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
low_querypos++;
} else if (high_chrpos > low_chrpos) {
- debug13(printf("high_chrpos %u > low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
+ debug15(printf("high_chrpos %u > low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
high_querypos++;
} else {
low_querypos++;
high_querypos++;
}
}
- debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
}
if ((high_querypos + 1) >= high_querylength || (low_querypos + 1) >= low_querylength ||
@@ -3721,12 +3707,12 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
} else {
if (plusp == true) {
low_querypos = hardclip_low;
- high_querypos = high_querylength - 1 - hardclip_high;
- debug13(printf("Both substrings, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ high_querypos = high_querylength /*- 1*/ - hardclip_high;
+ debug15(printf("Both substrings, plus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
low_querypos--;
high_querypos--;
- debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
while ((low_querypos - 1) >= 0 && (high_querypos - 1) >= 0 &&
((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring ||
@@ -3734,27 +3720,27 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
(high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL ||
Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring ||
Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring ||
- Substring_genomicstart_adj(low_substring) + low_querypos - chroffset != Substring_genomicstart_adj(high_substring) + high_querypos - chroffset)) {
+ Substring_genomicstart(low_substring) + low_querypos - chroffset != Substring_genomicstart(high_substring) + high_querypos - chroffset)) {
(*shift) += 1;
if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
low_querypos--;
} else if ((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL) {
high_querypos--;
} else {
- low_chrpos = Substring_genomicstart_adj(low_substring) + low_querypos - chroffset;
- high_chrpos = Substring_genomicstart_adj(high_substring) + high_querypos - chroffset;
+ low_chrpos = Substring_genomicstart(low_substring) + low_querypos - chroffset;
+ high_chrpos = Substring_genomicstart(high_substring) + high_querypos - chroffset;
if (low_chrpos > high_chrpos) {
- debug13(printf("low_chrpos %u > high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
+ debug15(printf("low_chrpos %u > high_chrpos %u, so decreasing low_querypos\n",low_chrpos,high_chrpos));
low_querypos--;
} else if (high_chrpos > low_chrpos) {
- debug13(printf("high_chrpos %u > low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
+ debug15(printf("high_chrpos %u > low_chrpos %u, so decreasing high_querypos\n",high_chrpos,low_chrpos));
high_querypos--;
} else {
low_querypos--;
high_querypos--;
}
}
- debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
}
if ((low_querypos - 1) < 0 || (high_querypos - 1) < 0 ||
@@ -3763,7 +3749,7 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
*shift = 0;
return 0;
} else {
- debug13(printf("Returning %u + %d\n",Substring_genomicstart_adj(low_substring) - chroffset,
+ debug15(printf("Returning %u + %d\n",Substring_genomicstart(low_substring) - chroffset,
low_querypos));
assert((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) != NULL);
assert((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) != NULL);
@@ -3771,17 +3757,17 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
assert(Stage3end_substring_containing(hit_low,low_querypos+1) == low_substring);
assert(Stage3end_substring_containing(hit_high,high_querypos-1) == high_substring);
assert(Stage3end_substring_containing(hit_high,high_querypos+1) == high_substring);
- return Substring_genomicstart_adj(low_substring) + low_querypos; /* Want univcoord */
+ return Substring_genomicstart(low_substring) + low_querypos; /* Want univcoord */
}
} else {
- low_querypos = low_querylength - 1 - hardclip_low;
+ low_querypos = low_querylength /*- 1*/ - hardclip_low;
high_querypos = hardclip_high;
- debug13(printf("Both substrings, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
+ debug15(printf("Both substrings, minus. low_querypos %d, high_querypos %d\n",low_querypos,high_querypos));
low_querypos++;
high_querypos++;
- debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
while ((low_querypos + 1) < low_querylength && (high_querypos + 1) < high_querylength &&
((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL ||
Stage3end_substring_containing(hit_low,low_querypos-1) != low_substring ||
@@ -3789,27 +3775,27 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
(high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL ||
Stage3end_substring_containing(hit_high,high_querypos-1) != high_substring ||
Stage3end_substring_containing(hit_high,high_querypos+1) != high_substring ||
- (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset != (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset)) {
+ (Substring_genomicstart(low_substring) - 1) - low_querypos - chroffset != (Substring_genomicstart(high_substring) - 1) - high_querypos - chroffset)) {
(*shift) += 1;
if ((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) == NULL) {
low_querypos++;
} else if ((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) == NULL) {
high_querypos++;
} else {
- low_chrpos = (Substring_genomicstart_adj(low_substring) - 1) - low_querypos - chroffset;
- high_chrpos = (Substring_genomicstart_adj(high_substring) - 1) - high_querypos - chroffset;
+ low_chrpos = (Substring_genomicstart(low_substring) - 1) - low_querypos - chroffset;
+ high_chrpos = (Substring_genomicstart(high_substring) - 1) - high_querypos - chroffset;
if (low_chrpos > high_chrpos) {
- debug13(printf("low_chrpos %u > high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
+ debug15(printf("low_chrpos %u > high_chrpos %u, so advancing low_querypos\n",low_chrpos,high_chrpos));
low_querypos++;
} else if (high_chrpos > low_chrpos) {
- debug13(printf("high_chrpos %u > low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
+ debug15(printf("high_chrpos %u > low_chrpos %u, so advancing high_querypos\n",high_chrpos,low_chrpos));
high_querypos++;
} else {
low_querypos++;
high_querypos++;
}
}
- debug13(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
+ debug15(printf("left shift %d: Advancing to low_querypos %d and high_querypos %d\n",*shift,low_querypos,high_querypos));
}
if ((low_querypos + 1) >= low_querylength || (high_querypos + 1) >= high_querylength ||
@@ -3818,7 +3804,7 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
*shift = 0;
return 0;
} else {
- debug13(printf("Returning %u - %d\n",Substring_genomicstart_adj(low_substring) - chroffset,
+ debug15(printf("Returning %u - %d\n",Substring_genomicstart(low_substring) - chroffset,
low_querypos));
assert((low_substring = Stage3end_substring_containing(hit_low,low_querypos)) != NULL);
assert((high_substring = Stage3end_substring_containing(hit_high,high_querypos)) != NULL);
@@ -3826,7 +3812,7 @@ adjust_hardclips_left (int *shift, int hardclip_low, Stage3end_T hit_low, int lo
assert(Stage3end_substring_containing(hit_low,low_querypos+1) == low_substring);
assert(Stage3end_substring_containing(hit_high,high_querypos-1) == high_substring);
assert(Stage3end_substring_containing(hit_high,high_querypos+1) == high_substring);
- return (Substring_genomicstart_adj(low_substring) - 1) - low_querypos; /* Want univcoord */
+ return (Substring_genomicstart(low_substring) - 1) - low_querypos; /* Want univcoord */
}
}
}
@@ -3853,99 +3839,95 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
hit5 = this->hit5;
hit3 = this->hit3;
- debug13(printf("Entered Stage3pair_overlap with hittype %s and %s\n",
+ debug15(printf("Entered Stage3pair_overlap with hittype %s and %s\n",
hittype_string(hit5->hittype),hittype_string(hit3->hittype)));
if (hit5->hittype == SAMECHR_SPLICE || hit5->hittype == TRANSLOC_SPLICE) {
return 0;
} else if (hit3->hittype == SAMECHR_SPLICE || hit3->hittype == TRANSLOC_SPLICE) {
return 0;
} else if (hit5->plusp != hit3->plusp) {
- debug13(printf("The two ends are not on the same strand, so returning 0\n"));
+ debug15(printf("The two ends are not on the same strand, so returning 0\n"));
return 0;
} else {
- debug13(printf("hit5 trim_left %d + amb_start %d, trim_right %d + amb_end %d, hit3 trim_left %d + amb_start %d, trim_right %d + amb_end %d\n",
- hit5->trim_left,hit5->start_amb_length,hit5->trim_right,hit5->end_amb_length,
- hit3->trim_left,hit3->start_amb_length,hit3->trim_right,hit3->end_amb_length));
+ debug15(printf("hit5 trim_left %d + amb_start %d, trim_right %d + amb_end %d, hit3 trim_left %d + amb_start %d, trim_right %d + amb_end %d\n",
+ hit5->trim_left,start_amb_length(hit5),hit5->trim_right,end_amb_length(hit5),
+ hit3->trim_left,start_amb_length(hit3),hit3->trim_right,end_amb_length(hit3)));
if (hit5->plusp == true) {
/* plus */
#if 0
- hit5_trimmed_length = hit5->querylength - hit5->trim_left - hit5->trim_right - hit5->start_amb_length - hit5->end_amb_length;
- hit3_trimmed_length = hit3->querylength - hit3->trim_left - hit3->trim_right - hit3->start_amb_length - hit3->end_amb_length;
+ hit5_trimmed_length = hit5->querylength - hit5->trim_left - hit5->trim_right - start_amb_length(hit5) - end_amb_length(hit5);
+ hit3_trimmed_length = hit3->querylength - hit3->trim_left - hit3->trim_right - start_amb_length(hit3) - end_amb_length(hit3);
totallength = hit5_trimmed_length + hit3_trimmed_length;
- debug13(printf("totallength = %d, hit5 trimmed length = %d, hit3 trimmed length = %d\n",
+ debug15(printf("totallength = %d, hit5 trimmed length = %d, hit3 trimmed length = %d\n",
totallength,hit5_trimmed_length,hit3_trimmed_length));
- debug13(printf("original insertlength: %d, trim+amb5: %d..%d, trim+amb3: %d..%d\n",
- this->insertlength,hit5->trim_left + hit5->start_amb_length,
- hit5->trim_right + hit5->end_amb_length,hit3->trim_left + hit3->start_amb_length,
- hit3->trim_right + hit3->end_amb_length));
+ debug15(printf("original insertlength: %d, trim+amb5: %d..%d, trim+amb3: %d..%d\n",
+ this->insertlength,hit5->trim_left + start_amb_length(hit5),
+ hit5->trim_right + end_amb_length(hit5),hit3->trim_left + start_amb_length(hit3),
+ hit3->trim_right + end_amb_length(hit3)));
#endif
if ((common_genomicpos = pair_common_genomicpos(hit5,hit3)) == 0) {
- debug13(printf("Cannot determine a common point, so returning 0\n"));
+ debug15(printf("Cannot determine a common point, so returning 0\n"));
return 0;
} else if (find_ilengths(&ilength5_low,&ilength5_high,hit5,common_genomicpos,hit5->chroffset) == false ||
find_ilengths(&ilength3_low,&ilength3_high,hit3,common_genomicpos,hit3->chroffset) == false) {
- debug13(printf("Cannot determine ilengths, so returning 0\n"));
+ debug15(printf("Cannot determine ilengths, so returning 0\n"));
return 0;
} else {
- debug13(printf("Inclusive: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
- debug13(printf("ilength53 is %d, ilength 35 is %d\n",ilength5_low + ilength3_high - 1,ilength3_low + ilength5_high - 1));
-
+ debug15(printf("Inclusive: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ debug15(printf("ilength53 is %d, ilength 35 is %d\n",ilength5_low + ilength3_high - 1,ilength3_low + ilength5_high - 1));
+
common_left = (ilength5_low < ilength3_low) ? ilength5_low : ilength3_low;
common_right = (ilength5_high < ilength3_high) ? ilength5_high : ilength3_high;
if (common_right > common_left) {
common_shift = common_right/2 - (common_left - 1)/2;
- debug13(printf("Common shift is %d = common_right %d/2 - (common_left %d - 1)/2\n",
+ debug15(printf("Common shift is %d = common_right %d/2 - (common_left %d - 1)/2\n",
common_shift,common_right,common_left));
ilength5_low -= 1;
ilength3_low -= 1;
} else {
common_shift = (common_right - 1)/2 - common_left/2;
- debug13(printf("Common shift is %d = (common_right %d - 1)/2 - common_left %d/2\n",
+ debug15(printf("Common shift is %d = (common_right %d - 1)/2 - common_left %d/2\n",
common_shift,common_right,common_left));
ilength5_high -= 1;
ilength3_high -= 1;
}
- debug13(printf("Exclusive: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ debug15(printf("Exclusive: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
if ((ilength53 = ilength5_low + ilength3_high) >= (ilength35 = ilength3_low + ilength5_high)) {
/* Use >=, not >, so we favor clipping heads over clipping tails in case of a tie */
- debug13(printf("plus, ilength53 is longer. Clipping heads.\n"));
- debug13(printf("Overlap is %d = common_left %d + common_right %d - 1\n",
+ debug15(printf("plus, ilength53 is longer. Clipping heads.\n"));
+ debug15(printf("Overlap is %d = common_left %d + common_right %d - 1\n",
common_left+common_right-1,common_left,common_right));
clipdir = +1;
/* Want to clip 5 high and 3 low */
*hardclip5_high = ilength5_high - common_shift;
*hardclip3_low = ilength3_low + common_shift;
- debug13(printf("Overlap clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ debug15(printf("Overlap clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
- *hardclip5_high += hit5->trim_right + hit5->end_amb_length;
- *hardclip3_low += hit3->trim_left + hit3->start_amb_length;
- debug13(printf("Ambig clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_high += hit5->trim_right + end_amb_length(hit5);
+ *hardclip3_low += hit3->trim_left + start_amb_length(hit3);
+ debug15(printf("Ambig clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
if (common_shift != 0) {
- if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,hit3->querylength_adj,
- *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset) == true) {
+ if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,*hardclip5_high,hit5,hit3->chroffset) == true) {
/* No adjustment needed, but need to recompute ilengths for shifted common_genomicpos */
} else {
- common_genomicpos_right = adjust_hardclips_right(&shift_right,*hardclip3_low,hit3,hit3->querylength_adj,
- *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset);
- common_genomicpos_left = adjust_hardclips_left(&shift_left,*hardclip3_low,hit3,hit3->querylength_adj,
- *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset);
- debug13(printf("shift_right %d, shift_left %d\n",shift_right,shift_left));
+ common_genomicpos_right = adjust_hardclips_right(&shift_right,*hardclip3_low,hit3,*hardclip5_high,hit5,hit3->chroffset);
+ common_genomicpos_left = adjust_hardclips_left(&shift_left,*hardclip3_low,hit3,*hardclip5_high,hit5,hit3->chroffset);
+ debug15(printf("shift_right %d, shift_left %d\n",shift_right,shift_left));
if (shift_right == 0 && shift_left == 0) {
/* Try original position without a shift */
*hardclip5_high = ilength5_high /*- common_shift*/;
*hardclip3_low = ilength3_low /*+ common_shift*/;
- *hardclip5_high += hit5->trim_right + hit5->end_amb_length;
- *hardclip3_low += hit3->trim_left + hit3->start_amb_length;
- if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,hit3->querylength_adj,
- *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset) == false) {
+ *hardclip5_high += hit5->trim_right + end_amb_length(hit5);
+ *hardclip3_low += hit3->trim_left + start_amb_length(hit3);
+ if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,*hardclip5_high,hit5,hit3->chroffset) == false) {
*hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
return 0;
}
@@ -3960,27 +3942,27 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
}
}
- debug13(printf("New common point is %u\n",common_genomicpos - hit3->chroffset));
+ debug15(printf("New common point is %u\n",common_genomicpos - hit3->chroffset));
/* Recompute hardclips */
if (find_ilengths(&ilength5_low,&ilength5_high,hit5,common_genomicpos,hit5->chroffset) == false ||
find_ilengths(&ilength3_low,&ilength3_high,hit3,common_genomicpos,hit3->chroffset) == false) {
*hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
return 0;
} else if (ilength3_low > ilength5_high) {
- debug13(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ debug15(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
ilength3_low -= 1;
} else {
- debug13(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ debug15(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
ilength5_high -= 1;
}
- debug13(printf("Even: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ debug15(printf("Even: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
*hardclip5_high = ilength5_high /*- common_shift*/;
*hardclip3_low = ilength3_low /*+ common_shift*/;
- *hardclip5_high += hit5->trim_right + hit5->end_amb_length;
- *hardclip3_low += hit3->trim_left + hit3->start_amb_length;
+ *hardclip5_high += hit5->trim_right + end_amb_length(hit5);
+ *hardclip3_low += hit3->trim_left + start_amb_length(hit3);
- debug13(printf("Recomputed clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ debug15(printf("Recomputed clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
}
@@ -3996,44 +3978,40 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
if (*hardclip3_low < 0) {
*hardclip3_low = 0;
}
- debug13(printf("Positive clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ debug15(printf("Positive clip for ilength53 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
#endif
} else {
- debug13(printf("plus, ilength35 is longer. Clipping tails.\n"));
- debug13(printf("Overlap is %d = common_left %d + common_right %d - 1\n",
+ debug15(printf("plus, ilength35 is longer. Clipping tails.\n"));
+ debug15(printf("Overlap is %d = common_left %d + common_right %d - 1\n",
common_left+common_right-1,common_left,common_right));
clipdir = -1;
/* Want to clip 5 low and 3 high */
*hardclip5_low = ilength5_low + common_shift;
*hardclip3_high = ilength3_high - common_shift;
- debug13(printf("Overlap clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ debug15(printf("Overlap clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
- *hardclip5_low += hit5->trim_left + hit5->start_amb_length;
- *hardclip3_high += hit3->trim_right + hit3->end_amb_length;
- debug13(printf("Ambig clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low += hit5->trim_left + start_amb_length(hit5);
+ *hardclip3_high += hit3->trim_right + end_amb_length(hit3);
+ debug15(printf("Ambig clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
if (common_shift != 0) {
- if (test_hardclips(&common_genomicpos,*hardclip5_low,hit5,hit5->querylength_adj,
- *hardclip3_high,hit3,hit3->querylength_adj,hit3->chroffset) == true) {
+ if (test_hardclips(&common_genomicpos,*hardclip5_low,hit5,*hardclip3_high,hit3,hit3->chroffset) == true) {
/* No adjustment needed, but need to recompute ilengths for shifted common_genomicpos */
} else {
- common_genomicpos_right = adjust_hardclips_right(&shift_right,*hardclip5_low,hit5,hit5->querylength_adj,
- *hardclip3_high,hit3,hit3->querylength_adj,hit3->chroffset);
- common_genomicpos_left = adjust_hardclips_left(&shift_left,*hardclip5_low,hit5,hit5->querylength_adj,
- *hardclip3_high,hit3,hit3->querylength_adj,hit3->chroffset);
- debug13(printf("shift_right %d, shift_left %d\n",shift_right,shift_left));
+ common_genomicpos_right = adjust_hardclips_right(&shift_right,*hardclip5_low,hit5,*hardclip3_high,hit3,hit3->chroffset);
+ common_genomicpos_left = adjust_hardclips_left(&shift_left,*hardclip5_low,hit5,*hardclip3_high,hit3,hit3->chroffset);
+ debug15(printf("shift_right %d, shift_left %d\n",shift_right,shift_left));
if (shift_right == 0 && shift_left == 0) {
/* Try original position without a shift */
*hardclip5_low = ilength5_low /*+ common_shift*/;
*hardclip3_high = ilength3_high /*- common_shift*/;
- *hardclip5_low += hit5->trim_left + hit5->start_amb_length;
- *hardclip3_high += hit3->trim_right + hit3->end_amb_length;
- if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,hit3->querylength_adj,
- *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset) == false) {
+ *hardclip5_low += hit5->trim_left + start_amb_length(hit5);
+ *hardclip3_high += hit3->trim_right + end_amb_length(hit3);
+ if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,*hardclip5_high,hit5,hit3->chroffset) == false) {
*hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
return 0;
}
@@ -4048,26 +4026,26 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
}
}
- debug13(printf("New common point is %u\n",common_genomicpos - hit3->chroffset));
+ debug15(printf("New common point is %u\n",common_genomicpos - hit3->chroffset));
/* Recompute hardclips */
if (find_ilengths(&ilength5_low,&ilength5_high,hit5,common_genomicpos,hit5->chroffset) == false ||
find_ilengths(&ilength3_low,&ilength3_high,hit3,common_genomicpos,hit3->chroffset) == false) {
*hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
return 0;
} else if (ilength5_low > ilength3_high) {
- debug13(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ debug15(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
ilength5_low -= 1;
} else {
- debug13(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ debug15(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
ilength3_high -= 1;
}
- debug13(printf("Even: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ debug15(printf("Even: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
*hardclip5_low = ilength5_low /*+ common_shift*/;
*hardclip3_high = ilength3_high /*- common_shift*/;
- *hardclip5_low += hit5->trim_left + hit5->start_amb_length;
- *hardclip3_high += hit3->trim_right + hit3->end_amb_length;
- debug13(printf("Recomputed clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low += hit5->trim_left + start_amb_length(hit5);
+ *hardclip3_high += hit3->trim_right + end_amb_length(hit3);
+ debug15(printf("Recomputed clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
}
@@ -4083,96 +4061,92 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
if (*hardclip3_high < 0) {
*hardclip3_high = 0;
}
- debug13(printf("Positive clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ debug15(printf("Positive clip for ilength35 plus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
#endif
}
- debug13(printf("returning clipdir %d\n",clipdir));
+ debug15(printf("returning clipdir %d\n",clipdir));
return clipdir;
}
} else {
/* minus */
#if 0
- hit5_trimmed_length = hit5->querylength - hit5->trim_left - hit5->trim_right - hit5->start_amb_length - hit5->end_amb_length;
- hit3_trimmed_length = hit3->querylength - hit3->trim_left - hit3->trim_right - hit3->start_amb_length - hit3->end_amb_length;
+ hit5_trimmed_length = hit5->querylength - hit5->trim_left - hit5->trim_right - start_amb_length(hit5) - end_amb_length(hit5);
+ hit3_trimmed_length = hit3->querylength - hit3->trim_left - hit3->trim_right - start_amb_length(hit3) - end_amb_length(hit3);
totallength = hit5_trimmed_length + hit3_trimmed_length;
- debug13(printf("totallength = %d, hit5 trimmed length = %d, hit3 trimmed length = %d\n",
+ debug15(printf("totallength = %d, hit5 trimmed length = %d, hit3 trimmed length = %d\n",
totallength,hit5_trimmed_length,hit3_trimmed_length));
- debug13(printf("original insertlength: %d, trim+amb5: %d..%d, trim+amb3: %d..%d\n",
- this->insertlength,hit5->trim_left + hit5->start_amb_length,
- hit5->trim_right + hit5->end_amb_length,hit3->trim_left + hit3->start_amb_length,
+ debug15(printf("original insertlength: %d, trim+amb5: %d..%d, trim+amb3: %d..%d\n",
+ this->insertlength,hit5->trim_left + start_amb_length(hit5),
+ hit5->trim_right + hit5->end_amb_length,hit3->trim_left + start_amb_length(hit3),
hit3->trim_right + hit3->end_amb_length));
#endif
if ((common_genomicpos = pair_common_genomicpos(hit5,hit3)) == 0) {
- debug13(printf("Cannot determine a common point, so returning 0\n"));
+ debug15(printf("Cannot determine a common point, so returning 0\n"));
return 0;
} else if (find_ilengths(&ilength5_low,&ilength5_high,hit5,common_genomicpos,hit5->chroffset) == false ||
find_ilengths(&ilength3_low,&ilength3_high,hit3,common_genomicpos,hit3->chroffset) == false) {
- debug13(printf("Cannot determine ilengths, so returning 0\n"));
+ debug15(printf("Cannot determine ilengths, so returning 0\n"));
return 0;
} else {
- debug13(printf("Inclusive: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
- debug13(printf("ilength53lh is %d, ilength35lh is %d\n",ilength5_low + ilength3_high - 1,ilength3_low + ilength5_high - 1));
+ debug15(printf("Inclusive: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ debug15(printf("ilength53lh is %d, ilength35lh is %d\n",ilength5_low + ilength3_high - 1,ilength3_low + ilength5_high - 1));
common_left = (ilength5_low < ilength3_low) ? ilength5_low : ilength3_low;
common_right = (ilength5_high < ilength3_high) ? ilength5_high : ilength3_high;
if (common_right > common_left) {
common_shift = common_right/2 - (common_left - 1)/2;
- debug13(printf("Common shift is %d = common_right %d/2 - (common_left %d - 1)/2\n",
+ debug15(printf("Common shift is %d = common_right %d/2 - (common_left %d - 1)/2\n",
common_shift,common_right,common_left));
ilength5_low -= 1;
ilength3_low -= 1;
} else {
common_shift = (common_right - 1)/2 - common_left/2;
- debug13(printf("Common shift is %d = (common_right %d - 1)/2 - common_left %d/2\n",
+ debug15(printf("Common shift is %d = (common_right %d - 1)/2 - common_left %d/2\n",
common_shift,common_right,common_left));
ilength5_high -= 1;
ilength3_high -= 1;
}
- debug13(printf("Exclusive: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ debug15(printf("Exclusive: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
if ((ilength53 = ilength5_low + ilength3_high) > (ilength35 = ilength3_low + ilength5_high)) {
/* Use >, not >=, so we favor clipping heads over clipping tails in case of a tie */
- debug13(printf("minus, ilength53 is longer. Clipping tails.\n"));
- overlap = common_left + common_right - 1;
- debug13(printf("Overlap is %d = common_left %d + common_right %d - 1\n",
+ debug15(printf("minus, ilength53 is longer. Clipping tails.\n"));
+ debug15(overlap = common_left + common_right - 1);
+ debug15(printf("Overlap is %d = common_left %d + common_right %d - 1\n",
overlap,common_left,common_right));
clipdir = +1;
-
+
/* Want to clip 5 high and 3 low */
*hardclip5_high = ilength5_high - common_shift;
*hardclip3_low = ilength3_low + common_shift;
- debug13(printf("Overlap clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ debug15(printf("Overlap clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
- *hardclip5_high += hit5->trim_left + hit5->start_amb_length;
- *hardclip3_low += hit3->trim_right + hit3->end_amb_length;
- debug13(printf("Ambig clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_high += hit5->trim_left + start_amb_length(hit5);
+ *hardclip3_low += hit3->trim_right + end_amb_length(hit3);
+ debug15(printf("Ambig clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
if (common_shift != 0) {
- if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,hit3->querylength_adj,
- *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset) == true) {
+ if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,*hardclip5_high,hit5,hit3->chroffset) == true) {
/* No adjustment needed, but need to recompute ilengths for shifted common_genomicpos */
} else {
- common_genomicpos_right = adjust_hardclips_right(&shift_right,*hardclip3_low,hit3,hit3->querylength_adj,
- *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset);
- common_genomicpos_left = adjust_hardclips_left(&shift_left,*hardclip3_low,hit3,hit3->querylength_adj,
- *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset);
- debug13(printf("shift_right %d, shift_left %d\n",shift_right,shift_left));
+ common_genomicpos_right = adjust_hardclips_right(&shift_right,*hardclip3_low,hit3,*hardclip5_high,hit5,hit3->chroffset);
+ common_genomicpos_left = adjust_hardclips_left(&shift_left,*hardclip3_low,hit3,*hardclip5_high,hit5,hit3->chroffset);
+ debug15(printf("shift_right %d, shift_left %d\n",shift_right,shift_left));
if (shift_right == 0 && shift_left == 0) {
/* Try original position without a shift */
*hardclip5_high = ilength5_high /*- common_shift*/;
*hardclip3_low = ilength3_low /*+ common_shift*/;
- *hardclip5_high += hit5->trim_left + hit5->start_amb_length;
- *hardclip3_low += hit3->trim_right + hit3->end_amb_length;
- if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,hit3->querylength_adj,
- *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset) == false) {
+ *hardclip5_high += hit5->trim_left + start_amb_length(hit5);
+ *hardclip3_low += hit3->trim_right + end_amb_length(hit3);
+ if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,*hardclip5_high,hit5,hit3->chroffset) == false) {
*hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
return 0;
}
@@ -4187,26 +4161,26 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
}
}
- debug13(printf("New common point is %u\n",common_genomicpos - hit3->chroffset));
+ debug15(printf("New common point is %u\n",common_genomicpos - hit3->chroffset));
/* Recompute hardclips */
if (find_ilengths(&ilength5_low,&ilength5_high,hit5,common_genomicpos,hit5->chroffset) == false ||
find_ilengths(&ilength3_low,&ilength3_high,hit3,common_genomicpos,hit3->chroffset) == false) {
*hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
return 0;
} else if (ilength3_low > ilength5_high) {
- debug13(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ debug15(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
ilength3_low -= 1;
} else {
- debug13(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ debug15(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
ilength5_high -= 1;
}
- debug13(printf("Even: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ debug15(printf("Even: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
*hardclip5_high = ilength5_high /*- common_shift*/;
*hardclip3_low = ilength3_low /*+ common_shift*/;
- *hardclip5_high += hit5->trim_left + hit5->start_amb_length;
- *hardclip3_low += hit3->trim_right + hit3->end_amb_length;
- debug13(printf("Recomputed clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_high += hit5->trim_left + start_amb_length(hit5);
+ *hardclip3_low += hit3->trim_right + end_amb_length(hit3);
+ debug15(printf("Recomputed clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
}
@@ -4221,45 +4195,41 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
if (*hardclip3_low < 0) {
*hardclip3_low = 0;
}
- debug13(printf("Positive clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ debug15(printf("Positive clip for ilength53 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
#endif
-
+
} else {
- debug13(printf("minus, ilength35 is longer. Clipping heads.\n"));
- overlap = common_left + common_right - 1;
- debug13(printf("Overlap is %d = common_left %d + common_right %d - 1\n",
+ debug15(printf("minus, ilength35 is longer. Clipping heads.\n"));
+ debug15(overlap = common_left + common_right - 1);
+ debug15(printf("Overlap is %d = common_left %d + common_right %d - 1\n",
overlap,common_left,common_right));
clipdir = -1;
/* Want to clip 5 low and 3 high */
*hardclip5_low = ilength5_low + common_shift;
*hardclip3_high = ilength3_high - common_shift;
- debug13(printf("Overlap clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ debug15(printf("Overlap clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
- *hardclip5_low += hit5->trim_right + hit5->end_amb_length;
- *hardclip3_high += hit3->trim_left + hit3->start_amb_length;
- debug13(printf("Ambig clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low += hit5->trim_right + end_amb_length(hit5);
+ *hardclip3_high += hit3->trim_left + start_amb_length(hit3);
+ debug15(printf("Ambig clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
if (common_shift != 0) {
- if (test_hardclips(&common_genomicpos,*hardclip5_low,hit5,hit5->querylength_adj,
- *hardclip3_high,hit3,hit3->querylength_adj,hit3->chroffset) == true) {
+ if (test_hardclips(&common_genomicpos,*hardclip5_low,hit5,*hardclip3_high,hit3,hit3->chroffset) == true) {
/* No adjustment needed, but need to recompute ilengths for shifted common_genomicpos */
} else {
- common_genomicpos_right = adjust_hardclips_right(&shift_right,*hardclip5_low,hit5,hit5->querylength_adj,
- *hardclip3_high,hit3,hit3->querylength_adj,hit3->chroffset);
- common_genomicpos_left = adjust_hardclips_left(&shift_left,*hardclip5_low,hit5,hit5->querylength_adj,
- *hardclip3_high,hit3,hit3->querylength_adj,hit3->chroffset);
- debug13(printf("shift_right %d, shift_left %d\n",shift_right,shift_left));
+ common_genomicpos_right = adjust_hardclips_right(&shift_right,*hardclip5_low,hit5,*hardclip3_high,hit3,hit3->chroffset);
+ common_genomicpos_left = adjust_hardclips_left(&shift_left,*hardclip5_low,hit5,*hardclip3_high,hit3,hit3->chroffset);
+ debug15(printf("shift_right %d, shift_left %d\n",shift_right,shift_left));
if (shift_right == 0 && shift_left == 0) {
/* Try original position without a shift */
*hardclip5_low = ilength5_low /*+ common_shift*/;
*hardclip3_high = ilength3_high /*- common_shift*/;
- *hardclip5_low += hit5->trim_right + hit5->end_amb_length;
- *hardclip3_high += hit3->trim_left + hit3->start_amb_length;
- if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,hit3->querylength_adj,
- *hardclip5_high,hit5,hit5->querylength_adj,hit3->chroffset) == false) {
+ *hardclip5_low += hit5->trim_right + end_amb_length(hit5);
+ *hardclip3_high += hit3->trim_left + start_amb_length(hit3);
+ if (test_hardclips(&common_genomicpos,*hardclip3_low,hit3,*hardclip5_high,hit5,hit3->chroffset) == false) {
*hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
return 0;
}
@@ -4274,26 +4244,26 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
}
}
- debug13(printf("New common point is %u\n",common_genomicpos - hit3->chroffset));
+ debug15(printf("New common point is %u\n",common_genomicpos - hit3->chroffset));
/* Recompute hardclips */
if (find_ilengths(&ilength5_low,&ilength5_high,hit5,common_genomicpos,hit5->chroffset) == false ||
find_ilengths(&ilength3_low,&ilength3_high,hit3,common_genomicpos,hit3->chroffset) == false) {
*hardclip5_low = *hardclip5_high = *hardclip3_low = *hardclip3_high = 0;
return 0;
} else if (ilength5_low > ilength3_high) {
- debug13(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ debug15(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
ilength5_low -= 1;
} else {
- debug13(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ debug15(printf("Uneven: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
ilength3_high -= 1;
}
- debug13(printf("Even: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+ debug15(printf("Even: ilengths5: %d|%d. ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
*hardclip5_low = ilength5_low /*+ common_shift*/;
*hardclip3_high = ilength3_high /*- common_shift*/;
- *hardclip5_low += hit5->trim_right + hit5->end_amb_length;
- *hardclip3_high += hit3->trim_left + hit3->start_amb_length;
- debug13(printf("Recomputed clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ *hardclip5_low += hit5->trim_right + end_amb_length(hit5);
+ *hardclip3_high += hit3->trim_left + start_amb_length(hit3);
+ debug15(printf("Recomputed clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
}
@@ -4309,13 +4279,13 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
if (*hardclip3_high < 0) {
*hardclip3_high = 0;
}
- debug13(printf("Positive clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
+ debug15(printf("Positive clip for ilength35 minus is hardclip5 %d..%d and hardclip3 %d..%d\n",
*hardclip5_low,*hardclip5_high,*hardclip3_low,*hardclip3_high));
#endif
}
}
- debug13(printf("returning clipdir %d\n",clipdir));
+ debug15(printf("returning clipdir %d\n",clipdir));
return clipdir;
}
}
@@ -4363,7 +4333,7 @@ Stage3pair_free (Stage3pair_T *old) {
FREE_OUT(*old);
return;
}
-
+
static Overlap_T
Stage3pair_gene_overlap (Stage3pair_T this) {
@@ -4417,7 +4387,6 @@ Stage3pair_tally (Stage3pair_T this) {
#endif
-#if 0
static char complCode[128] = COMPLEMENT_LC;
static char *
@@ -4431,7 +4400,6 @@ make_complement_buffered (char *complement, char *sequence, unsigned int length)
complement[length] = '\0';
return complement;
}
-#endif
const Except_T Copy_Substring = { "Substring invalid during copy" };
@@ -4439,6 +4407,9 @@ const Except_T Copy_Substring = { "Substring invalid during copy" };
T
Stage3end_copy (T old) {
T new = (T) MALLOC_OUT(sizeof(*new));
+ List_T p;
+ Substring_T old_substring, new_substring;
+ Junction_T old_junction, new_junction;
debug0(printf("Copying Stage3end %p -> %p of type %s\n",
old,new,hittype_string(old->hittype)));
@@ -4446,6 +4417,7 @@ Stage3end_copy (T old) {
new->hittype = old->hittype;
new->genestrand = old->genestrand;
new->sarrayp = old->sarrayp;
+ new->gmap_source = old->gmap_source;
new->improved_by_gmap_p = old->improved_by_gmap_p;
new->chrnum = old->chrnum;
@@ -4483,7 +4455,7 @@ Stage3end_copy (T old) {
new->trim_left_splicep = old->trim_left_splicep;
new->trim_right_splicep = old->trim_right_splicep;
- new->penalties = old->penalties;
+ /* new->penalties = old->penalties; */
new->score_eventrim = old->score_eventrim;
new->gene_overlap = old->gene_overlap;
@@ -4494,15 +4466,7 @@ Stage3end_copy (T old) {
new->nmismatches_refdiff = old->nmismatches_refdiff;
new->nindels = old->nindels;
- new->indel_pos = old->indel_pos;
- new->indel_low = old->indel_low;
- if (old->deletion == NULL) {
- new->deletion = (char *) NULL;
- } else {
- new->deletion = (char *) CALLOC_OUT(strlen(old->deletion)+1,sizeof(char));
- strcpy(new->deletion,old->deletion);
- }
-
+
new->distance = old->distance;
new->shortexonA_distance = old->shortexonA_distance;
new->shortexonD_distance = old->shortexonD_distance;
@@ -4511,185 +4475,77 @@ Stage3end_copy (T old) {
new->gmap_cdna_direction = old->gmap_cdna_direction;
new->gmap_nintrons = old->gmap_nintrons;
new->sensedir = old->sensedir;
- new->sensedir_nonamb = old->sensedir_nonamb;
+ new->gmap_start_amb_length = old->gmap_start_amb_length;
+ new->gmap_end_amb_length = old->gmap_end_amb_length;
new->gmap_start_endtype = old->gmap_start_endtype;
new->gmap_end_endtype = old->gmap_end_endtype;
+ new->nsplices = old->nsplices;
- new->start_ambiguous_p = old->start_ambiguous_p;
- new->end_ambiguous_p = old->end_ambiguous_p;
-
- new->start_amb_length = old->start_amb_length;
- new->end_amb_length = old->end_amb_length;
- new->amb_length_donor = old->amb_length_donor;
- new->amb_length_acceptor = old->amb_length_acceptor;
-
- new->start_amb_prob = old->start_amb_prob;
- new->end_amb_prob = old->end_amb_prob;
- new->amb_prob_donor = old->amb_length_donor;
- new->amb_prob_acceptor = old->amb_length_acceptor;
-
- if ((new->nambcoords_donor = old->nambcoords_donor) == 0) {
- new->ambcoords_donor = (Univcoord_T *) NULL;
- new->amb_knowni_donor = (int *) NULL;
- new->amb_nmismatches_donor = (int *) NULL;
- new->amb_probs_donor = (double *) NULL;
- } else {
- new->ambcoords_donor = (Univcoord_T *) CALLOC_OUT(old->nambcoords_donor,sizeof(Univcoord_T));
- memcpy(new->ambcoords_donor,old->ambcoords_donor,old->nambcoords_donor*sizeof(Univcoord_T));
- new->amb_knowni_donor = (int *) CALLOC_OUT(old->nambcoords_donor,sizeof(int));
- memcpy(new->amb_knowni_donor,old->amb_knowni_donor,old->nambcoords_donor*sizeof(int));
- new->amb_nmismatches_donor = (int *) CALLOC_OUT(old->nambcoords_donor,sizeof(int));
- memcpy(new->amb_nmismatches_donor,old->amb_nmismatches_donor,old->nambcoords_donor*sizeof(int));
- new->amb_probs_donor = (double *) CALLOC_OUT(old->nambcoords_donor,sizeof(double));
- memcpy(new->amb_probs_donor,old->amb_probs_donor,old->nambcoords_donor*sizeof(double));
- }
-
- if ((new->nambcoords_acceptor = old->nambcoords_acceptor) == 0) {
- new->ambcoords_acceptor = (Univcoord_T *) NULL;
- new->amb_knowni_acceptor = (int *) NULL;
- new->amb_nmismatches_acceptor = (int *) NULL;
- new->amb_probs_acceptor = (double *) NULL;
- } else {
- new->ambcoords_acceptor = (Univcoord_T *) CALLOC_OUT(old->nambcoords_acceptor,sizeof(Univcoord_T));
- memcpy(new->ambcoords_acceptor,old->ambcoords_acceptor,old->nambcoords_acceptor*sizeof(Univcoord_T));
- new->amb_knowni_acceptor = (int *) CALLOC_OUT(old->nambcoords_acceptor,sizeof(int));
- memcpy(new->amb_knowni_acceptor,old->amb_knowni_acceptor,old->nambcoords_acceptor*sizeof(int));
- new->amb_nmismatches_acceptor = (int *) CALLOC_OUT(old->nambcoords_acceptor,sizeof(int));
- memcpy(new->amb_nmismatches_acceptor,old->amb_nmismatches_acceptor,old->nambcoords_acceptor*sizeof(int));
- new->amb_probs_acceptor = (double *) CALLOC_OUT(old->nambcoords_acceptor,sizeof(double));
- memcpy(new->amb_probs_acceptor,old->amb_probs_acceptor,old->nambcoords_acceptor*sizeof(double));
- }
-
- if (old->sensedir == SENSE_FORWARD) {
- new->start_ambcoords = new->ambcoords_donor;
- new->start_nambcoords = new->nambcoords_donor;
- new->start_amb_knowni = new->amb_knowni_donor;
- new->start_amb_nmismatches = new->amb_nmismatches_donor;
- new->start_amb_probs = new->amb_probs_donor;
-
- new->end_ambcoords = new->ambcoords_acceptor;
- new->end_nambcoords = new->nambcoords_acceptor;
- new->end_amb_knowni = new->amb_knowni_acceptor;
- new->end_amb_nmismatches = new->amb_nmismatches_acceptor;
- new->end_amb_probs = new->amb_probs_acceptor;
-
- } else {
- new->start_ambcoords = new->ambcoords_acceptor;
- new->start_nambcoords = new->nambcoords_acceptor;
- new->start_amb_knowni = new->amb_knowni_acceptor;
- new->start_amb_nmismatches = new->amb_nmismatches_acceptor;
- new->start_amb_probs = new->amb_probs_acceptor;
-
- new->end_ambcoords = new->ambcoords_donor;
- new->end_nambcoords = new->nambcoords_donor;
- new->end_amb_knowni = new->amb_knowni_donor;
- new->end_amb_nmismatches = new->amb_nmismatches_donor;
- new->end_amb_probs = new->amb_probs_donor;
- }
-
-
- new->nchimera_known = old->nchimera_known;
- new->nchimera_novel = old->nchimera_novel;
+ new->substrings_1toN = (List_T) NULL;
+ new->substrings_Nto1 = (List_T) NULL;
+ new->substrings_LtoH = (List_T) NULL;
- new->substring_LtoH = (List_T) NULL;
+ new->junctions_1toN = (List_T) NULL;
+ new->junctions_Nto1 = (List_T) NULL;
+ new->junctions_LtoH = (List_T) NULL;
if (old->hittype == GMAP) {
new->pairarray = Pairpool_copy_array(old->pairarray,old->npairs);
new->npairs = old->npairs;
+ new->cigar_tokens = Pair_tokens_copy(old->cigar_tokens);
+ new->gmap_intronp = old->gmap_intronp;
new->nsegments = old->nsegments;
- new->substring1 = (Substring_T) NULL;
- new->substring2 = (Substring_T) NULL;
- new->substring0 = (Substring_T) NULL;
-
} else {
new->pairarray = (struct Pair_T *) NULL;
new->npairs = 0;
+ new->cigar_tokens = (List_T) NULL;
+ new->gmap_intronp = false;
new->nsegments = 0;
- new->substring1 = Substring_copy(old->substring1);
- new->substring2 = Substring_copy(old->substring2);
- new->substring0 = Substring_copy(old->substring0);
+ for (p = old->substrings_1toN; p != NULL; p = List_next(p)) {
+ old_substring = (Substring_T) List_head(p);
+ new_substring = Substring_copy(old_substring);
+ new->substrings_1toN = List_push(new->substrings_1toN,(void *) new_substring);
+ }
+
+ for (p = old->junctions_1toN; p != NULL; p = List_next(p)) {
+ old_junction = (Junction_T) List_head(p);
+ new_junction = Junction_copy(old_junction);
+ new->junctions_1toN = List_push(new->junctions_1toN,(void *) new_junction);
+ }
- if (new->plusp == true) {
- if (new->substring2 != NULL) {
- new->substring_LtoH = List_push(new->substring_LtoH,(void *) new->substring2);
- }
- new->substring_LtoH = List_push(new->substring_LtoH,(void *) new->substring1);
- if (new->substring0 != NULL) {
- new->substring_LtoH = List_push(new->substring_LtoH,(void *) new->substring0);
+ new->substrings_Nto1 = List_copy(new->substrings_1toN); /* Before reversal of 1toN */
+ new->junctions_Nto1 = List_copy(new->junctions_1toN); /* Before reversal of 1toN */
+
+ /* Reversals to handle builds of 1toN */
+ new->substrings_1toN = List_reverse(new->substrings_1toN);
+ new->junctions_1toN = List_reverse(new->junctions_1toN);
+
+ if (old->chrnum == 0) {
+ /* Translocation */
+ if (old->sensedir == SENSE_FORWARD) {
+ new->substrings_LtoH = List_copy(new->substrings_1toN);
+ new->junctions_LtoH = List_copy(new->junctions_1toN);
+ } else if (old->sensedir == SENSE_ANTI) {
+ new->substrings_LtoH = List_copy(new->substrings_Nto1);
+ new->junctions_LtoH = List_copy(new->junctions_Nto1);
+ } else {
+ abort();
}
+
} else {
- if (new->substring0 != NULL) {
- new->substring_LtoH = List_push(new->substring_LtoH,(void *) new->substring0);
- }
- new->substring_LtoH = List_push(new->substring_LtoH,(void *) new->substring1);
- if (new->substring2 != NULL) {
- new->substring_LtoH = List_push(new->substring_LtoH,(void *) new->substring2);
+ if (old->plusp == true) {
+ new->substrings_LtoH = List_copy(new->substrings_1toN);
+ new->junctions_LtoH = List_copy(new->junctions_1toN);
+ } else {
+ new->substrings_LtoH = List_copy(new->substrings_Nto1);
+ new->junctions_LtoH = List_copy(new->junctions_Nto1);
}
}
- }
-
- if (old->substring_donor == NULL) {
- new->substring_donor = NULL;
- } else if (old->substring_donor == old->substring1) {
- new->substring_donor = new->substring1;
- } else if (old->substring_donor == old->substring2) {
- new->substring_donor = new->substring2;
- } else {
- fprintf(stderr,"substring_donor for type %s is not NULL, substring1, or substring2\n",
- hittype_string(old->hittype));
- fprintf(stderr,"substring_donor %p\n",old->substring_donor);
- fprintf(stderr,"substring1 %p\n",old->substring1);
- fprintf(stderr,"substring2 %p\n",old->substring2);
- Except_raise(&Copy_Substring, __FILE__, __LINE__);
- }
-
- if (old->substring_acceptor == NULL) {
- new->substring_acceptor = NULL;
- } else if (old->substring_acceptor == old->substring1) {
- new->substring_acceptor = new->substring1;
- } else if (old->substring_acceptor == old->substring2) {
- new->substring_acceptor = new->substring2;
- } else {
- fprintf(stderr,"substring_acceptor for type %s is not NULL, substring1, or substring2\n",
- hittype_string(old->hittype));
- fprintf(stderr,"substring_acceptor %p\n",old->substring_acceptor);
- fprintf(stderr,"substring1 %p\n",old->substring1);
- fprintf(stderr,"substring2 %p\n",old->substring2);
- Except_raise(&Copy_Substring, __FILE__, __LINE__);
- }
-
- if (old->substringD == NULL) {
- new->substringD = NULL;
- } else if (old->substringD == old->substring0) {
- new->substringD = new->substring0;
- } else if (old->substringD == old->substring2) {
- new->substringD = new->substring2;
- } else {
- fprintf(stderr,"substringD for type %s is not NULL, substring0, or substring2\n",
- hittype_string(old->hittype));
- fprintf(stderr,"substringD %p\n",old->substringD);
- fprintf(stderr,"substring0 %p\n",old->substring0);
- fprintf(stderr,"substring2 %p\n",old->substring2);
- Except_raise(&Copy_Substring, __FILE__, __LINE__);
- }
-
- if (old->substringA == NULL) {
- new->substringA = NULL;
- } else if (old->substringA == old->substring0) {
- new->substringA = new->substring0;
- } else if (old->substringA == old->substring2) {
- new->substringA = new->substring2;
- } else {
- fprintf(stderr,"substringA for type %s is not NULL, substring0, or substring2\n",
- hittype_string(old->hittype));
- fprintf(stderr,"substringA %p\n",old->substringA);
- fprintf(stderr,"substring0 %p\n",old->substring0);
- fprintf(stderr,"substring2 %p\n",old->substring2);
- Except_raise(&Copy_Substring, __FILE__, __LINE__);
+ assert(Substring_querystart(List_head(new->substrings_1toN)) <= Substring_querystart(List_head(new->substrings_Nto1)));
}
new->paired_usedp = old->paired_usedp;
@@ -4706,9 +4562,13 @@ Stage3end_copy (T old) {
static int
compute_circularpos (int *alias, T hit) {
int circularpos;
+ List_T p;
+ Substring_T substring;
+
- debug12(printf("Computing circularpos on hit at %u..%u with trim left %d and trim right %d\n",
- hit->genomicstart - hit->chroffset,hit->genomicend - hit->chroffset,hit->trim_left,hit->trim_right));
+ debug12(printf("Computing circularpos on hit at %u..%u, plusp %d, with trim left %d and trim right %d\n",
+ hit->genomicstart - hit->chroffset,hit->genomicend - hit->chroffset,
+ hit->plusp,hit->trim_left,hit->trim_right));
if (circularp[hit->chrnum] == false) {
debug12(printf("Chromosome #%d is not circular\n",hit->chrnum));
/* This also handles hit->chrnum == 0, where translocation cannot be circular */
@@ -4716,8 +4576,10 @@ compute_circularpos (int *alias, T hit) {
return -1;
} else if (hit->hittype == GMAP) {
+ debug12(printf("Pair circularpos is %d\n",Pair_circularpos(&(*alias),hit->pairarray,hit->npairs,hit->chrlength,
+ hit->plusp,hit->querylength)));
return Pair_circularpos(&(*alias),hit->pairarray,hit->npairs,hit->chrlength,
- hit->plusp,hit->querylength_adj);
+ hit->plusp,hit->querylength);
} else if (hit->plusp == true) {
if (
@@ -4746,18 +4608,14 @@ compute_circularpos (int *alias, T hit) {
} else {
*alias = 0;
- if ((circularpos = Substring_circularpos(hit->substring0)) > 0) {
- debug12(printf("Returning circularpos %d from substring0 (plus)\n",circularpos));
- return circularpos;
- } else if ((circularpos = Substring_circularpos(hit->substring1)) > 0) {
- debug12(printf("Returning circularpos %d from substring1 (plus)\n",circularpos));
- return circularpos;
- } else if ((circularpos = Substring_circularpos(hit->substring2)) > 0) {
- debug12(printf("Returning circularpos %d from substring2 (plus)\n",circularpos));
- return circularpos;
- } else {
- return -1;
+ for (p = hit->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if ((circularpos = Substring_circularpos(substring)) > 0) {
+ debug12(printf("Returning circularpos %d from substring (plus)\n",circularpos));
+ return circularpos;
+ }
}
+ return -1;
}
} else {
@@ -4780,7 +4638,7 @@ compute_circularpos (int *alias, T hit) {
#else
hit->high <= hit->chroffset + hit->chrlength
#endif
- ) {
+ ) {
/* All of read after trimming is in circular proper */
debug12(printf("Soft clip of %d on left avoids circularization\n",hit->trim_left));
debug12(printf("All of read after trimming is in circular proper\n"));
@@ -4789,108 +4647,1130 @@ compute_circularpos (int *alias, T hit) {
} else {
*alias = 0;
- if ((circularpos = Substring_circularpos(hit->substring2)) > 0) {
- debug12(printf("Returning circularpos %d from substring2 (minus)\n",circularpos));
- return circularpos;
- } else if ((circularpos = Substring_circularpos(hit->substring1)) > 0) {
- debug12(printf("Returning circularpos %d from substring1 (minus)\n",circularpos));
- return circularpos;
- } else if ((circularpos = Substring_circularpos(hit->substring0)) > 0) {
- debug12(printf("Returning circularpos %d from substring0 (minus)\n",circularpos));
- return circularpos;
- } else {
- return -1;
+ for (p = hit->substrings_Nto1; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if ((circularpos = Substring_circularpos(substring)) > 0) {
+ debug12(printf("Returning circularpos %d from substring (minus)\n",circularpos));
+ return circularpos;
+ }
}
+ return -1;
}
}
}
T
-Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Compress_T query_compress,
- bool plusp, int genestrand, bool first_read_p,
- Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
- Chrpos_T chrlength, bool sarrayp) {
+Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
+#ifdef LARGE_GENOMES
+ Uint8list_T lefts,
+#else
+ Uintlist_T lefts,
+#endif
+ Intlist_T nmismatches_list, List_T junctions, int querylength,
+ Compress_T query_compress,
+ Substring_T right_ambig, Substring_T left_ambig,
+ bool plusp, int genestrand, int sensedir, bool first_read_p,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
+ Chrpos_T chrlength, bool sarrayp) {
T new;
- Substring_T substring;
- Univcoord_T genomicstart, genomicend;
+ Univcoord_T genomicstart, genomicend, genomicstart_adj, genomicend_adj,
+ alignstart, alignend, alignstart_trim, alignend_trim;
+ int querylength_trimmed = 0;
+ int querystart, queryend;
+ Univcoord_T left;
+ Intlist_T r, x;
+#ifdef LARGE_GENOMES
+ Uint8list_T q;
+#else
+ Uintlist_T q;
+#endif
+ Substring_T substring, substring1, substringN;
+ Junction_T junction, junction_ambig = NULL;
+ Junctiontype_T type;
+ List_T substrings = NULL, p, j;
+ bool trim_left_p = false, trim_right_p = false;
+ int outofbounds_start = 0, outofbounds_end = 0;
+ int adj = 0, adj0; /* deletions - insertions */
+ int nmismatches_whole = 0, nmismatches, indel_score = 0, nindels = 0;
+ int nmismatches_bothdiff = 0;
+
+
+ debug0(printf("%s read: Entered Stage3end_new_substrings at left %u, with plusp %d, sensedir %d, and endpoints %s\n",
+ first_read_p ? "First" : "Second",Uintlist_head(lefts),plusp,sensedir,Intlist_to_string(endpoints)));
+ debug0(printf("There are %d endpoints, %d lefts, %d nmismatches, and %d junctions\n",
+ Intlist_length(endpoints),Uintlist_length(lefts),Intlist_length(nmismatches_list),List_length(junctions)));
+ debug0(printf("Ambig left %p, right %p\n",left_ambig,right_ambig));
+ debug0(printf("Endpoints: %s\n",Intlist_to_string(endpoints)));
+ debug0(printf("Lefts: %s\n",Uintlist_to_string(lefts)));
+ debug0(printf("Mismatches: %s\n",Intlist_to_string(nmismatches_list)));
+ assert(Uintlist_length(lefts) == Intlist_length(endpoints) - 1);
+ assert(Intlist_length(nmismatches_list) == Intlist_length(endpoints) - 1);
+ assert(List_length(junctions) == Intlist_length(endpoints) - 2);
+
+
+#ifdef DEBUG0
+ for (p = junctions; p != NULL; p = List_next(p)) {
+ Junction_print((Junction_T) List_head(p));
+ }
+ printf("\n");
+#endif
+
+ querystart = Intlist_head(endpoints);
if (plusp == true) {
- if ((genomicend = left + genomiclength) > chrhigh) {
- return (T) NULL;
+ j = junctions; /* Put here before we handle left_ambig */
+ if (left_ambig != NULL) {
+ substrings = List_push(substrings,(void *) left_ambig);
+ junctions = List_push(junctions,(void *) Junction_new_splice(/*distance*/0,sensedir,
+ Substring_amb_donor_prob(left_ambig),
+ Substring_amb_acceptor_prob(left_ambig)));
} else {
+ trim_left_p = true;
+ }
+
+ /* Add querypos to get alignstart/alignend */
+ for (q = lefts, x = nmismatches_list, r = Intlist_next(endpoints); q != NULL;
+#ifdef LARGE_GENOMES
+ q = Uint8list_next(q),
+#else
+ q = Uintlist_next(q),
+#endif
+ x = Intlist_next(x), r = Intlist_next(r), j = List_next(j)) {
+ queryend = Intlist_head(r);
+#ifdef LARGE_GENOMES
+ left = Uint8list_head(q);
+#else
+ left = Uintlist_head(q);
+#endif
+ debug0(printf("Working on querystart %d..queryend %d at left %u\n",querystart,queryend,left));
+
genomicstart = left;
+ genomicend = left + querylength;
+ genomicstart_adj = genomicstart + adj;
+ genomicend_adj = genomicend + adj;
+
+ alignstart = genomicstart + querystart;
+ alignend = genomicstart + queryend;
+
+ if (genomicstart < chroffset && genomicend > chrhigh) {
+ /* Out of bounds on both sides */
+ return (T) NULL;
+
+ } else if (genomicstart < chroffset) {
+ outofbounds_start = chroffset - genomicstart;
+ outofbounds_end = genomicend - chroffset;
+ debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+ return (T) NULL;
+#if 0
+ /* Could consider this for the lowest substring */
+ if (outofbounds_start > outofbounds_end) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ outofbounds_start = 0;
+ return (T) NULL;
+ } else {
+ /* Consider low part to be out of bounds and stay in this chromosome */
+ /* Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint); */
+ outofbounds_end = 0;
+ }
+#endif
+ } else if (genomicend > chrhigh) {
+ outofbounds_start = chrhigh - genomicstart;
+ outofbounds_end = genomicend - chrhigh;
+ debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+ return (T) NULL;
+#if 0
+ /* Could consider this for the highest substring */
+ if (outofbounds_start > outofbounds_end) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ outofbounds_start = 0;
+ } else if (++chrnum > nchromosomes) {
+ debug0(printf("Returning NULL from Stage3end_new_substrings\n"));
+ return (T) NULL;
+ } else {
+ /* Consider low part to be out of bounds and move to next chromosome */
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ outofbounds_end = 0;
+ }
+#endif
+ }
+
+ if ((nmismatches = Intlist_head(x)) < 0) {
+ nmismatches = Genome_count_mismatches_substring(query_compress,left,/*pos5*/querystart,/*pos3*/queryend,
+ /*plusp*/true,genestrand,first_read_p);
+ debug0(printf("nmismatches %d from genome\n",nmismatches));
+ }
+ nmismatches_whole += nmismatches;
+ debug0(printf("nmismatches %d from sarray\n",nmismatches));
+ if (Uintlist_next(q) == NULL && right_ambig == NULL) {
+ trim_right_p = true;
+ }
+ if ((substring = Substring_new(/*nmismatches_whole*/nmismatches,chrnum,chroffset,chrhigh,chrlength,
+ query_compress,/*start_endtype*/END,/*end_endtype*/END,
+ querystart,queryend,querylength,alignstart,alignend,
+ /*genomiclength*/querylength,
+ /*exactp*/Intlist_head(x) == 0 ? true : false,plusp,genestrand,first_read_p,
+ trim_left_p,trim_right_p,outofbounds_start,outofbounds_end,
+ /*minlength*/0)) == NULL) {
+ /* Don't know how to fix the junctions */
+ debug0(printf("Don't know how to fix the junctions, so returning NULL from Stage3end_new_substrings\n"));
+ for (p = substrings; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (substring == left_ambig) {
+ /* left_ambig freed by calling procedure. Need to free junction created for left_ambig. */
+ junctions = List_pop(junctions,(void **) &junction);
+ Junction_free(&junction);
+ } else {
+ Substring_free(&substring);
+ }
+ }
+ List_free(&substrings);
+ return (T) NULL;
+ } else {
+ substrings = List_push(substrings,(void *) substring);
+ nmismatches_bothdiff += Substring_nmismatches_bothdiff(substring);
+ querylength_trimmed += Substring_querylength(substring);
+ }
+
+ /* Prepare for next iteration */
+ querystart = queryend;
+ if (j != NULL) {
+ junction = (Junction_T) List_head(j);
+ if ((adj0 = Junction_adj(junction)) != 0) {
+ adj += adj0;
+ indel_score += indel_penalty_middle;
+ nindels += Junction_nindels(junction);
+ if (adj0 < 0) {
+ querystart -= adj0; /* Insertion */
+ }
+ }
+ }
+ trim_left_p = false;
}
+
} else {
- if ((genomicstart = left + genomiclength) > chrhigh) {
- return (T) NULL;
+ j = junctions; /* Put here before we handle left_ambig */
+ if (left_ambig != NULL) {
+ substrings = List_push(substrings,(void *) left_ambig);
+ junctions = List_push(junctions,(void *) Junction_new_splice(/*distance*/0,sensedir,
+ Substring_amb_donor_prob(left_ambig),
+ Substring_amb_acceptor_prob(left_ambig)));
} else {
+ trim_right_p = true;
+ }
+
+ /* Subtract querypos to get alignstart/alignend */
+ for (q = lefts, x = nmismatches_list, r = Intlist_next(endpoints); q != NULL;
+#ifdef LARGE_GENOMES
+ q = Uint8list_next(q),
+#else
+ q = Uintlist_next(q),
+#endif
+ x = Intlist_next(x), r = Intlist_next(r), j = List_next(j)) {
+ queryend = Intlist_head(r);
+#ifdef LARGE_GENOMES
+ left = Uint8list_head(q);
+#else
+ left = Uintlist_head(q);
+#endif
+ debug0(printf("Working on querystart %d..queryend %d at left %u\n",querystart,queryend,left));
+
genomicend = left;
+ genomicstart = left + querylength;
+ genomicend_adj = genomicend - adj;
+ genomicstart_adj = genomicend - adj;
+
+ alignstart = genomicstart - (querylength - queryend);
+ alignend = genomicstart - (querylength - querystart);
+
+ if (genomicend < chroffset && genomicstart > chrhigh) {
+ /* Out of bounds on both sides */
+ return (T) NULL;
+
+ } else if (genomicend < chroffset) {
+ outofbounds_end = chroffset - genomicend;
+ outofbounds_start = genomicstart - chroffset;
+ debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+ return (T) NULL;
+#if 0
+ /* Could consider this for the lowest substring */
+ if (outofbounds_end > outofbounds_start) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ outofbounds_end = 0;
+ } else {
+ /* Consider low part to be out of bounds and stay in this chromosome */
+ /* Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint); */
+ outofbounds_start = 0;
+ }
+#endif
+
+ } else if (genomicstart > chrhigh) {
+ outofbounds_end = chrhigh - genomicend;
+ outofbounds_start = genomicstart - chrhigh;
+ debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+ return (T) NULL;
+#if 0
+ /* Could consider this for the highest substring */
+ if (outofbounds_end > outofbounds_start) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ outofbounds_end = 0;
+ } else if (++chrnum > nchromosomes) {
+ debug0(printf("Returning NULL from Stage3end_new_substrings\n"));
+ return (T) NULL;
+ } else {
+ /* Consider low part to be out of bounds and move to next chromosome */
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ outofbounds_start = 0;
+ }
+#endif
+ }
+
+ if ((nmismatches = Intlist_head(x)) < 0) {
+ nmismatches = Genome_count_mismatches_substring(query_compress,left,/*pos5*/querystart,/*pos3*/queryend,
+ /*plusp*/false,genestrand,first_read_p);
+ debug0(printf("nmismatches %d from genome\n",nmismatches));
+ }
+ nmismatches_whole += nmismatches;
+ debug0(printf("nmismatches %d from sarray\n",nmismatches));
+ if (Uintlist_next(q) == NULL && right_ambig == NULL) {
+ trim_left_p = true;
+ }
+ if ((substring = Substring_new(/*nmismatches_whole*/nmismatches,chrnum,chroffset,chrhigh,chrlength,
+ query_compress,/*start_endtype*/END,/*end_endtype*/END,
+ /*querystart*/querylength - queryend,/*queryend*/querylength - querystart,querylength,
+ alignstart,alignend,/*genomiclength*/querylength,
+ /*exactp*/Intlist_head(x) == 0 ? true : false,plusp,genestrand,first_read_p,
+ trim_left_p,trim_right_p,outofbounds_start,outofbounds_end,
+ /*minlength*/0)) == NULL) {
+ /* Don't know how to fix the junctions */
+ debug0(printf("Don't know how to fix the junctions, so returning NULL from Stage3end_new_substrings\n"));
+ for (p = substrings; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (substring == left_ambig) {
+ /* left_ambig freed by calling procedure. Need to free junction created for left_ambig. */
+ junctions = List_pop(junctions,(void **) &junction);
+ Junction_free(&junction);
+ } else {
+ Substring_free(&substring);
+ }
+ }
+ List_free(&substrings);
+ return (T) NULL;
+ } else {
+ substrings = List_push(substrings,(void *) substring);
+ nmismatches_bothdiff += Substring_nmismatches_bothdiff(substring);
+ querylength_trimmed += Substring_querylength(substring);
+ }
+
+ /* Prepare for next iteration */
+ querystart = queryend;
+ if (j != NULL) {
+ junction = (Junction_T) List_head(j);
+ if ((adj0 = Junction_adj(junction)) != 0) {
+ adj += adj0;
+ indel_score += indel_penalty_middle;
+ nindels += Junction_nindels(junction);
+ if (adj0 < 0) {
+ querystart -= adj0; /* Insertion */
+ }
+ }
+ }
+ trim_right_p = false;
}
}
- if ((substring = Substring_new(/*nmismatches*/0,chrnum,chroffset,chrhigh,chrlength,left,
- genomicstart,genomicend,/*genomicstart_adj*/genomicstart,/*genomicend_adj*/genomicend,
- query_compress,/*start_endtype*/END,/*end_endtype*/END,
- /*querystart*/0,/*queryend*/genomiclength,/*querylength*/genomiclength,
- /*alignstart*/genomicstart,/*alignend*/genomicend,
- genomiclength,/*extraleft*/0,/*extraright*/0,/*exactp*/true,
- plusp,genestrand,first_read_p,/*trim_left_p*/false,/*trim_right_p*/false,
- /*minlength*/0)) == NULL) {
- return (T) NULL;
-
- } else {
- new = (T) MALLOC_OUT(sizeof(*new));
- debug0(printf("Stage3end_new_exact %p: left %llu, chrnum %d\n",new,(unsigned long long) left,chrnum));
+ if (right_ambig != NULL) {
+ substrings = List_push(substrings,(void *) right_ambig);
+ junctions = List_reverse(junctions);
+ junctions = List_push(junctions,(void *) Junction_new_splice(/*distance*/0,sensedir,
+ Substring_amb_donor_prob(right_ambig),
+ Substring_amb_acceptor_prob(right_ambig)));
+ junctions = List_reverse(junctions);
+ }
- new->substring1 = substring;
- new->substring2 = (Substring_T) NULL;
- new->substring0 = (Substring_T) NULL;
- new->substring_donor = new->substring_acceptor = (Substring_T) NULL;
- new->substringD = new->substringA = (Substring_T) NULL;
- new->substring_LtoH = List_push(NULL,(void *) new->substring1);
+#ifdef DEBUG0
+ printf("NEW JUNCTIONS\n");
+ for (p = junctions; p != NULL; p = List_next(p)) {
+ Junction_print(List_head(p));
+ }
+ printf("\n");
+#endif
- new->pairarray = (struct Pair_T *) NULL;
+ new = (T) MALLOC(sizeof(*new));
+ new->hittype = SUBSTRINGS;
- new->deletion = (char *) NULL;
- new->querylength_adj = new->querylength = genomiclength;
- new->genomicstart = genomicstart;
- new->genomicend = genomicend;
+ new->pairarray = (struct Pair_T *) NULL;
+ new->cigar_tokens = (List_T) NULL;
+ new->gmap_intronp = false;
- if (genomicstart < genomicend) {
- new->low = genomicstart;
- new->high = genomicend;
- } else {
- new->low = genomicend;
- new->high = genomicstart;
- }
- new->genomiclength = new->high - new->low;
- new->guided_insertlength = 0U;
- debug0(printf("Assigned %llu to low and %llu to high\n",(unsigned long long) new->low,(unsigned long long) new->high));
+ new->querylength = querylength;
+ new->querylength_adj = querylength + adj;
+ new->substrings_LtoH = substrings;
+ new->substrings_1toN = List_copy(substrings); /* Takes over as primary holder of substrings */
+ new->substrings_Nto1 = List_copy(substrings);
- new->hittype = EXACT;
- new->genestrand = genestrand;
- new->sarrayp = sarrayp;
- new->improved_by_gmap_p = false;
+ new->junctions_LtoH = junctions;
+ new->junctions_1toN = List_copy(junctions); /* Takes over as primary holder of substrings */
+ new->junctions_Nto1 = List_copy(junctions);
- new->chrnum = new->effective_chrnum = chrnum;
- new->other_chrnum = 0;
- new->chroffset = chroffset;
- new->chrhigh = chrhigh;
- new->chrlength = chrlength;
- new->plusp = plusp;
- new->sensedir = new->sensedir_nonamb = SENSE_NULL;
+ /* Note differences between substrings and junctions. Substrings
+ were pushed onto lists above, and junctions were created by the
+ caller, so they are originally in opposite orders */
#if 0
- new->mapq_loglik = Substring_mapq_loglik(substring);
- new->mapq_score = 0;
+ if (plusp == true) {
+ new->substrings_LtoH = List_reverse(new->substrings_LtoH);
+ new->substrings_1toN = List_reverse(new->substrings_1toN);
+ new->junctions_Nto1 = List_reverse(new->junctions_Nto1);
+ } else {
+ new->junctions_LtoH = List_reverse(new->junctions_LtoH);
+ new->substrings_Nto1 = List_reverse(new->substrings_Nto1);
+ new->junctions_1toN = List_reverse(new->junctions_1toN);
+ }
+#else
+ /* Correct for both plus and minus */
+ new->substrings_LtoH = List_reverse(new->substrings_LtoH);
+ if (plusp == true) {
+ new->substrings_1toN = List_reverse(new->substrings_1toN);
+ new->junctions_Nto1 = List_reverse(new->junctions_Nto1);
+ } else {
+ new->substrings_Nto1 = List_reverse(new->substrings_Nto1);
+ new->junctions_1toN = List_reverse(new->junctions_1toN);
+ }
+#endif
+
+#ifdef DEBUG0
+ printf("NEW SUBSTRINGS\n");
+ for (p = new->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = List_head(p);
+ printf("%d..%d\n",Substring_querystart(substring),Substring_queryend(substring));
+ }
+ printf("\n");
+#endif
+
+
+ substring1 = (Substring_T) List_head(new->substrings_1toN);
+ substringN = (Substring_T) List_head(new->substrings_Nto1);
+
+ genomicstart = Substring_genomicstart(substring1);
+ genomicend = Substring_genomicend(substringN); /* DOESN'T WORK FOR AMBIGUOUS */
+ new->genomicstart = genomicstart;
+ new->genomicend = genomicend;
+
+ if (genomicstart < genomicend) {
+ new->low = genomicstart;
+ new->high = genomicend;
+ } else {
+ new->low = genomicend;
+ new->high = genomicstart;
+ }
+ new->genomiclength = new->high - new->low;
+ new->guided_insertlength = 0U;
+
+ new->genestrand = genestrand;
+ new->sarrayp = sarrayp;
+ new->gmap_source = GMAP_NOT_APPLICABLE;
+ new->improved_by_gmap_p = false;
+
+ new->chrnum = new->effective_chrnum = chrnum;
+ new->other_chrnum = 0;
+ new->chroffset = chroffset;
+ new->chrhigh = chrhigh;
+ new->chrlength = chrlength;
+ new->plusp = plusp;
+ new->sensedir = sensedir;
+
+ new->nindels = nindels;
+ new->nmismatches_whole = nmismatches_whole;
+ new->nmismatches_bothdiff = nmismatches_bothdiff; /* Trimmed */
+ /* new->nmismatches_refdiff = 0; */
+ new->ntscore = nmismatches_whole + indel_score;
+ new->score = nmismatches_whole + indel_score; /* Want untrimmed */
+ new->nmatches = querylength - nmismatches_whole;
+ new->nmatches_posttrim = querylength_trimmed - nmismatches_whole;
+
+ new->trim_left = Substring_trim_left(substring1);
+ new->trim_right = Substring_trim_right(substringN);
+ new->trim_left_splicep = Substring_trim_left_splicep(substring1);
+ new->trim_right_splicep = Substring_trim_right_splicep(substringN);
+ debug0(printf("substrings trim_left %d, trim_right %d\n",new->trim_left,new->trim_right));
+
+ /* new->penalties = 0; */
+
+ /* new->gene_overlap = NO_KNOWN_GENE; -- initialized later when resolving multimappers */
+ new->tally = -1L;
+ *found_score = new->score;
+
+ new->nsplices = 0;
+ for (p = junctions; p != NULL; p = List_next(p)) {
+ junction = (Junction_T) List_head(p);
+ if (Junction_type(junction) == SPLICE_JUNCTION) {
+ new->nsplices += 1;
+ }
+ }
+
+ new->distance = 0U;
+ new->shortexonA_distance = new->shortexonD_distance = 0U;
+
+ new->paired_usedp = false;
+ new->paired_seenp = false;
+ new->concordantp = false;
+
+ new->circularpos = compute_circularpos(&new->alias,new);
+
+ debug0(printf("Returning %p from Stage3end_new_substrings with found_score %d\n",new,*found_score));
+ return new;
+}
+
+
+#define add_bounded(x,plusterm,highbound) ((x + (plusterm) >= highbound) ? (highbound - 1) : x + (plusterm))
+#define subtract_bounded(x,minusterm,lowbound) ((x < lowbound + (minusterm)) ? lowbound : x - (minusterm))
+
+
+/* Modified from run_gmap_plus in sarray-read.c */
+T
+Stage3end_substrings_run_gmap_plus (T this, char *queryuc_ptr, int querylength,
+ int genestrand, bool first_read_p,
+ int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
+ T hit;
+ List_T stage2pairs, all_stage2_starts, all_stage2_ends;
+ List_T p, startp;
+
+ int k, i;
+ int querystart, queryend;
+ Univcoord_T *ambcoords;
+
+ int sensedir;
+
+ struct Pair_T *pairarray;
+ List_T pairs;
+ Substring_T substring, first_ambig, last_ambig;
+ int querypos, seglength;
+ Chrpos_T genomepos;
+ char c, g, g_alt, comp;
+
+ int npairs, goodness, cdna_direction, matches, nmatches_posttrim,
+ max_match_length, ambig_end_length_5, ambig_end_length_3,
+ unknowns, mismatches, qopens, qindels, topens, tindels,
+ ncanonical, nsemicanonical, nnoncanonical;
+ double ambig_prob_5, ambig_prob_3, min_splice_prob;
+ Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
+ Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
+ Univcoord_T start, end;
+ int nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
+ char *gsequence_orig, *gsequence_alt;
+
+
+ debug13(printf("Entered Stage3hr_substrings_run_gmap_plus\n"));
+
+#ifdef HAVE_ALLOCA
+ gsequence_orig = (char *) MALLOCA((querylength+1) * sizeof(char));
+ gsequence_alt = (char *) MALLOCA((querylength+1) * sizeof(char));
+#else
+ gsequence_orig = (char *) MALLOC((querylength+1) * sizeof(char));
+ gsequence_alt = (char *) MALLOC((querylength+1) * sizeof(char));
+#endif
+
+ startp = this->substrings_1toN;
+ if (Substring_ambiguous_p((Substring_T) List_head(startp)) == true) {
+ first_ambig = (Substring_T) List_head(startp);
+ startp = List_next(startp);
+ } else {
+ first_ambig = (Substring_T) NULL;
+ }
+
+ p = this->substrings_Nto1;
+ if (Substring_ambiguous_p((Substring_T) List_head(p)) == true) {
+ last_ambig = (Substring_T) List_head(p);
+ } else {
+ last_ambig = (Substring_T) NULL;
+ }
+
+
+ /* D. Make all_stage2_starts (paths) */
+ all_stage2_starts = (List_T) NULL;
+ if (first_ambig != NULL) {
+ debug13(printf("Handling first ambig\n"));
+ querystart = Substring_querystart(first_ambig);
+ queryend = Substring_queryend(first_ambig);
+ seglength = queryend - querystart;
+ ambcoords = Substring_ambcoords(first_ambig);
+
+ for (k = 0; k < Substring_nambcoords(first_ambig); k++) {
+ debug13(printf("START, PLUS\n"));
+ stage2pairs = (List_T) NULL;
+ querypos = querystart; /* Should be 0 */
+ genomepos = (ambcoords[k] - seglength) - this->chroffset;
+ Genome_get_segment_blocks_left(gsequence_orig,gsequence_alt,/*left*/ambcoords[k] - seglength,
+ seglength,this->chrhigh,/*revcomp*/false);
+ for (i = 0; i < seglength; i++) {
+ c = queryuc_ptr[querypos];
+ g = gsequence_orig[i];
+ g_alt = gsequence_alt[i];
+ if (g == c || g_alt == c) {
+ comp = MATCH_COMP;
+ } else {
+ comp = MISMATCH_COMP;
+ }
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
+ stage2pairs = Pairpool_push(stage2pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+ querypos++;
+ genomepos++;
+ }
+ debug13(Pair_dump_list(stage2pairs,true));
+ all_stage2_starts = List_push(all_stage2_starts,(void *) stage2pairs);
+ }
+ }
+
+
+ /* E. Make all_stage2_ends (pairs) */
+ all_stage2_ends = (List_T) NULL;
+ if (last_ambig != NULL) {
+ debug13(printf("Handling last ambig\n"));
+ querystart = Substring_querystart(last_ambig);
+ queryend = Substring_queryend(last_ambig);
+ seglength = queryend - querystart;
+ ambcoords = Substring_ambcoords(last_ambig);
+
+ for (k = 0; k < Substring_nambcoords(last_ambig); k++) {
+ debug13(printf("END, PLUS\n"));
+ stage2pairs = (List_T) NULL;
+ querypos = querystart;
+ genomepos = ambcoords[k] - this->chroffset;
+ Genome_get_segment_blocks_right(gsequence_orig,gsequence_alt,/*left*/ambcoords[k],
+ seglength,this->chrhigh,/*revcomp*/false);
+
+ for (i = 0; i < seglength; i++) {
+ c = queryuc_ptr[querypos];
+ g = gsequence_orig[i];
+ g_alt = gsequence_alt[i];
+ if (g == c || g_alt == c) {
+ comp = MATCH_COMP;
+ } else {
+ comp = MISMATCH_COMP;
+ }
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
+ stage2pairs = Pairpool_push(stage2pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+ querypos++;
+ genomepos++;
+ }
+ debug13(Pair_dump_list(stage2pairs,true));
+ all_stage2_ends = List_push(all_stage2_ends,(void *) List_reverse(stage2pairs));
+ }
+ }
+
+ /* F. Make stage2pairs */
+ stage2pairs = (List_T) NULL;
+ for (p = startp; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_ambiguous_p(substring) == true) {
+ /* Skip */
+ } else {
+ debug13(printf("Handling substring for %d..%d, %u..%u, chrlength %u\n",
+ Substring_querystart(substring),Substring_queryend(substring),
+ Substring_alignstart_trim_chr(substring),Substring_alignend_trim_chr(substring),
+ this->chrlength));
+ querypos = Substring_querystart(substring);
+ seglength = Substring_queryend(substring) - querypos;
+
+ genomepos = Substring_alignstart_trim_chr(substring);
+ Genome_get_segment_blocks_right(gsequence_orig,gsequence_alt,/*left*/Substring_alignstart_trim(substring),
+ seglength,this->chrhigh,/*revcomp*/false);
+
+ for (i = 0; i < seglength; i++) {
+ c = queryuc_ptr[querypos];
+ g = gsequence_orig[i];
+ g_alt = gsequence_alt[i];
+ if (g == c || g_alt == c) {
+ comp = MATCH_COMP;
+ } else {
+ comp = MISMATCH_COMP;
+ }
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
+ stage2pairs = Pairpool_push(stage2pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+ querypos++;
+ genomepos++;
+ }
+ debug13(Pair_dump_list(stage2pairs,true));
+ debug13(printf("\n"));
+ }
+ }
+
+ knownsplice_limit_high = ((Pair_T) stage2pairs->first)->genomepos + this->chroffset;
+ stage2pairs = List_reverse(stage2pairs);
+ knownsplice_limit_low = ((Pair_T) stage2pairs->first)->genomepos + this->chroffset;
+
+ if ((pairarray = Stage3_compute(&pairs,&npairs,&goodness,&cdna_direction,&sensedir,
+ &matches,&nmatches_posttrim,&max_match_length,
+ &ambig_end_length_5,&ambig_end_length_3,
+ &ambig_splicetype_5,&ambig_splicetype_3,
+ &ambig_prob_5,&ambig_prob_3,
+ &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
+ &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,
+ stage2pairs,all_stage2_starts,all_stage2_ends,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
+ watsonp ? query_compress_fwd : query_compress_rev,
+#endif
+ /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
+#ifdef EXTRACT_GENOMICSEG
+ /*query_subseq_offset*/0,
+#else
+ /*query_subseq_offset*/0,
+#endif
+ this->chrnum,this->chroffset,this->chrhigh,
+ knownsplice_limit_low,knownsplice_limit_high,/*plusp*/true,genestrand,
+ /*jump_late_p*/false,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ /*sense_try*/0,/*sense_filter*/0,
+ oligoindices_minor,diagpool,cellpool)) == NULL) {
+ hit = (T) NULL;
+
+ } else {
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray,npairs);
+ start = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray[0])),
+ /*minusterm*/Pair_querypos(&(pairarray[0])),this->chroffset);
+ end = add_bounded(this->chroffset + Pair_genomepos(&(pairarray[npairs-1])),
+ /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),this->chrhigh);
+
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
+ ambig_end_length_5,ambig_end_length_3,
+ ambig_splicetype_5,ambig_splicetype_3,
+ ambig_prob_5,ambig_prob_3,min_splice_prob,
+ pairarray,npairs,nsegments,nintrons,nindelbreaks,
+ /*left*/start,/*genomiclength*/end - start + 1,
+ /*plusp*/true,genestrand,first_read_p,
+ /*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
+ cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
+ FREE_OUT(pairarray);
+ }
+ }
+
+ List_free(&all_stage2_ends);
+ List_free(&all_stage2_starts);
+
+#ifdef HAVE_ALLOCA
+ FREEA(gsequence_alt);
+ FREEA(gsequence_orig);
+#else
+ FREE(gsequence_alt);
+ FREE(gsequence_orig);
+#endif
+
+ return hit;
+}
+
+
+/* Modified from run_gmap_minus in sarray-read.c */
+T
+Stage3end_substrings_run_gmap_minus (T this, char *queryuc_ptr, int querylength,
+ int genestrand, bool first_read_p,
+ int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
+ T hit;
+ List_T stage2pairs, all_stage2_starts, all_stage2_ends;
+ List_T p, startp;
+
+ int k, i;
+ int querystart, queryend;
+ Univcoord_T *ambcoords;
+
+ int sensedir;
+
+ struct Pair_T *pairarray;
+ List_T pairs;
+ Substring_T substring, first_ambig, last_ambig;
+ int querypos, seglength;
+ Chrpos_T genomepos;
+ char c, g, g_alt, comp;
+
+ int npairs, goodness, cdna_direction, matches, nmatches_posttrim,
+ max_match_length, ambig_end_length_5, ambig_end_length_3,
+ unknowns, mismatches, qopens, qindels, topens, tindels,
+ ncanonical, nsemicanonical, nnoncanonical;
+ double ambig_prob_5, ambig_prob_3, min_splice_prob;
+ Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
+ Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
+ Univcoord_T start, end;
+ int nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
+
+ char *gsequence_orig, *gsequence_alt;
+
+ debug13(printf("Entered Stage3hr_substrings_run_gmap_minus\n"));
+
+#ifdef HAVE_ALLOCA
+ gsequence_orig = (char *) MALLOCA((querylength+1) * sizeof(char));
+ gsequence_alt = (char *) MALLOCA((querylength+1) * sizeof(char));
+#else
+ gsequence_orig = (char *) MALLOC((querylength+1) * sizeof(char));
+ gsequence_alt = (char *) MALLOC((querylength+1) * sizeof(char));
+#endif
+
+ startp = this->substrings_1toN;
+ if (Substring_ambiguous_p((Substring_T) List_head(startp)) == true) {
+ first_ambig = (Substring_T) List_head(startp);
+ startp = List_next(startp);
+ } else {
+ first_ambig = (Substring_T) NULL;
+ }
+
+ p = this->substrings_Nto1;
+ if (Substring_ambiguous_p((Substring_T) List_head(p)) == true) {
+ last_ambig = (Substring_T) List_head(p);
+ } else {
+ last_ambig = (Substring_T) NULL;
+ }
+
+
+ /* D. Make all_stage2_starts (paths) */
+ all_stage2_starts = (List_T) NULL;
+ if (first_ambig != NULL) {
+ debug13(printf("Handling first ambig\n"));
+ querystart = Substring_querystart(first_ambig);
+ queryend = Substring_queryend(first_ambig);
+ seglength = queryend - querystart;
+ ambcoords = Substring_ambcoords(first_ambig);
+
+ for (k = 0; k < Substring_nambcoords(first_ambig); k++) {
+ debug13(printf("START, MINUS\n"));
+ stage2pairs = (List_T) NULL;
+ querypos = querystart;
+ genomepos = (this->chrhigh + 1) - ambcoords[k] - seglength;
+ Genome_get_segment_blocks_right(gsequence_orig,gsequence_alt,/*left*/ambcoords[k],
+ seglength,this->chrhigh,/*revcomp*/true);
+
+ for (i = 0; i < seglength; i++) {
+ c = queryuc_ptr[querypos];
+ g = gsequence_orig[i];
+ g_alt = gsequence_alt[i];
+ if (g == c || g_alt == c) {
+ comp = MATCH_COMP;
+ } else {
+ comp = MISMATCH_COMP;
+ }
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
+ stage2pairs = Pairpool_push(stage2pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+ querypos++;
+ genomepos++;
+ }
+ debug13(Pair_dump_list(stage2pairs,true));
+ all_stage2_starts = List_push(all_stage2_starts,(void *) stage2pairs);
+ }
+ }
+
+
+ /* E. Make all_stage2_ends (pairs) */
+ all_stage2_ends = (List_T) NULL;
+ if (last_ambig != NULL) {
+ debug13(printf("Handling last ambig\n"));
+ querystart = Substring_querystart(last_ambig);
+ queryend = Substring_queryend(last_ambig);
+ seglength = queryend - querystart;
+ ambcoords = Substring_ambcoords(last_ambig);
+
+ for (k = 0; k < Substring_nambcoords(last_ambig); k++) {
+ debug13(printf("END, MINUS\n"));
+ stage2pairs = (List_T) NULL;
+ querypos = querystart;
+ genomepos = (this->chrhigh + 1) - ambcoords[k];
+ Genome_get_segment_blocks_left(gsequence_orig,gsequence_alt,/*left*/ambcoords[k] - seglength,
+ seglength,this->chrhigh,/*revcomp*/true);
+
+ for (i = 0; i < seglength; i++) {
+ c = queryuc_ptr[querypos];
+ g = gsequence_orig[i];
+ g_alt = gsequence_alt[i];
+ if (g == c || g_alt == c) {
+ comp = MATCH_COMP;
+ } else {
+ comp = MISMATCH_COMP;
+ }
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
+ stage2pairs = Pairpool_push(stage2pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+ querypos++;
+ genomepos++;
+ }
+ debug13(Pair_dump_list(stage2pairs,true));
+ all_stage2_ends = List_push(all_stage2_ends,(void *) List_reverse(stage2pairs));
+ }
+ }
+
+ /* F. Make stage2pairs */
+ stage2pairs = (List_T) NULL;
+ for (p = startp; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_ambiguous_p(substring) == true) {
+ /* Skip */
+ } else {
+ debug13(printf("Handling substring for %d..%d, %u..%u, chrlength %u\n",
+ Substring_querystart(substring),Substring_queryend(substring),
+ Substring_alignstart_trim_chr(substring),Substring_alignend_trim_chr(substring),
+ this->chrlength));
+ querypos = Substring_querystart(substring);
+ seglength = Substring_queryend(substring) - querypos;
+
+ /* Don't understand why it isn't this->chrlength - 1, but it
+ looks like the minus coordinates for substrings are +1 higher
+ than they should be */
+ genomepos = (this->chrlength + 1) - Substring_alignstart_trim_chr(substring);
+ Genome_get_segment_blocks_right(gsequence_orig,gsequence_alt,/*left*/Substring_alignend_trim(substring),
+ seglength,this->chrhigh,/*revcomp*/true);
+
+ for (i = 0; i < seglength; i++) {
+ c = queryuc_ptr[querypos];
+ g = gsequence_orig[i];
+ g_alt = gsequence_alt[i];
+ if (g == c || g_alt == c) {
+ comp = MATCH_COMP;
+ } else {
+ comp = MISMATCH_COMP;
+ }
+ debug13(printf("Pushing %c %c %c at %d,%u\n",c,comp,g,querypos,genomepos));
+ stage2pairs = Pairpool_push(stage2pairs,pairpool,querypos,genomepos,
+ /*cdna*/c,comp,/*genome*/g,/*genomealt*/g_alt,
+ /*dynprogindex*/0);
+ querypos++;
+ genomepos++;
+ }
+ debug13(Pair_dump_list(stage2pairs,true));
+ debug13(printf("\n"));
+ }
+ }
+
+
+ knownsplice_limit_low = ((Pair_T) stage2pairs->first)->genomepos + this->chroffset;
+ stage2pairs = List_reverse(stage2pairs);
+ knownsplice_limit_high = ((Pair_T) stage2pairs->first)->genomepos + this->chroffset;
+
+
+ if ((pairarray = Stage3_compute(&pairs,&npairs,&goodness,&cdna_direction,&sensedir,
+ &matches,&nmatches_posttrim,&max_match_length,
+ &ambig_end_length_5,&ambig_end_length_3,
+ &ambig_splicetype_5,&ambig_splicetype_3,
+ &ambig_prob_5,&ambig_prob_3,
+ &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
+ &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,
+ stage2pairs,all_stage2_starts,all_stage2_ends,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
+ watsonp ? query_compress_fwd : query_compress_rev,
+#endif
+ /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
+#ifdef EXTRACT_GENOMICSEG
+ /*query_subseq_offset*/0,
+#else
+ /*query_subseq_offset*/0,
+#endif
+ this->chrnum,this->chroffset,this->chrhigh,
+ knownsplice_limit_low,knownsplice_limit_high,/*plusp*/false,genestrand,
+ /*jump_late_p*/true,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+ /*sense_try*/0,/*sense_filter*/0,
+ oligoindices_minor,diagpool,cellpool)) == NULL) {
+ hit = (T) NULL;
+
+ } else {
+ nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+ pairarray,npairs);
+ start = add_bounded(this->chroffset + Pair_genomepos(&(pairarray[0])),
+ /*plusterm*/Pair_querypos(&(pairarray[0])),this->chrhigh);
+ end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray[npairs-1])),
+ /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),this->chroffset);
+ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
+ ambig_end_length_5,ambig_end_length_3,
+ ambig_splicetype_5,ambig_splicetype_3,
+ ambig_prob_5,ambig_prob_3,min_splice_prob,
+ pairarray,npairs,nsegments,nintrons,nindelbreaks,
+ /*left*/end,/*genomiclength*/start - end + 1,
+ /*plusp*/false,genestrand,first_read_p,
+ /*accession*/NULL,querylength,this->chrnum,this->chroffset,this->chrhigh,this->chrlength,
+ cdna_direction,sensedir,/*gmap_source*/GMAP_VIA_SUBSTRINGS)) == NULL) {
+ FREE_OUT(pairarray);
+ }
+ }
+
+ List_free(&all_stage2_ends);
+ List_free(&all_stage2_starts);
+
+#ifdef HAVE_ALLOCA
+ FREEA(gsequence_alt);
+ FREEA(gsequence_orig);
+#else
+ FREE(gsequence_alt);
+ FREE(gsequence_orig);
+#endif
+
+ return hit;
+}
+
+
+
+
+T
+Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Compress_T query_compress,
+ bool plusp, int genestrand, bool first_read_p,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
+ Chrpos_T chrlength, bool sarrayp) {
+ T new;
+ Substring_T substring;
+ Univcoord_T genomicstart, genomicend;
+ bool exactp = true;
+ int outofbounds_start = 0, outofbounds_end = 0;
+
+ if (plusp == true) {
+ genomicstart = left;
+ genomicend = left + genomiclength;
+ if (genomicstart < chroffset && genomicend > chrhigh) {
+ /* Out of bounds on both sides */
+ return (T) NULL;
+
+ } else if (genomicstart < chroffset) {
+ outofbounds_start = chroffset - genomicstart;
+ outofbounds_end = genomicend - chroffset;
+ debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_start > outofbounds_end) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ outofbounds_start = 0;
+ } else {
+ /* Consider low part to be out of bounds and stay in this chromosome */
+ /* Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint); */
+ outofbounds_end = 0;
+ }
+ exactp = false;
+
+ } else if (genomicend > chrhigh) {
+ outofbounds_start = chrhigh - genomicstart;
+ outofbounds_end = genomicend - chrhigh;
+ debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_start > outofbounds_end) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ outofbounds_start = 0;
+ } else if (++chrnum > nchromosomes) {
+ return (T) NULL;
+ } else {
+ /* Consider low part to be out of bounds and move to next chromosome */
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ outofbounds_end = 0;
+ }
+ exactp = false;
+ }
+
+ } else {
+ genomicend = left;
+ genomicstart = left + genomiclength;
+
+ if (genomicend < chroffset && genomicstart > chrhigh) {
+ /* Out of bounds on both ends */
+ return (T) NULL;
+
+ } else if (genomicend < chroffset) {
+ outofbounds_end = chroffset - genomicend;
+ outofbounds_start = genomicstart - chroffset;
+ debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_end > outofbounds_start) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ outofbounds_end = 0;
+ } else {
+ /* Consider low part to be out of bounds and stay in this chromosome */
+ /* Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint); */
+ outofbounds_start = 0;
+ }
+ exactp = false;
+
+ } else if (genomicstart > chrhigh) {
+ outofbounds_end = chrhigh - genomicend;
+ outofbounds_start = genomicstart - chrhigh;
+ debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_end > outofbounds_start) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ outofbounds_end = 0;
+ } else if (++chrnum > nchromosomes) {
+ return (T) NULL;
+ } else {
+ /* Consider low part to be out of bounds and move to next chromosome */
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ outofbounds_start = 0;
+ }
+ exactp = false;
+ }
+ }
+
+ if ((substring = Substring_new(/*nmismatches*/0,chrnum,chroffset,chrhigh,chrlength,
+ query_compress,/*start_endtype*/END,/*end_endtype*/END,
+ /*querystart*/0,/*queryend*/genomiclength,/*querylength*/genomiclength,
+ /*alignstart*/genomicstart,/*alignend*/genomicend,
+ genomiclength,exactp,plusp,genestrand,first_read_p,/*trim_left_p*/false,/*trim_right_p*/false,
+ outofbounds_start,outofbounds_end,/*minlength*/0)) == NULL) {
+ return (T) NULL;
+
+ } else {
+ new = (T) MALLOC_OUT(sizeof(*new));
+ debug0(printf("Stage3end_new_exact %p: left %llu, chrnum %d, sarrayp %d\n",new,(unsigned long long) left,chrnum,sarrayp));
+
+ new->substrings_LtoH = List_push(NULL,(void *) substring);
+ new->substrings_1toN = List_push(NULL,(void *) substring);
+ new->substrings_Nto1 = List_push(NULL,(void *) substring);
+
+ new->junctions_LtoH = (List_T) NULL;
+ new->junctions_1toN = (List_T) NULL;
+ new->junctions_Nto1 = (List_T) NULL;
+
+ new->pairarray = (struct Pair_T *) NULL;
+ new->cigar_tokens = (List_T) NULL;
+ new->gmap_intronp = false;
+
+ new->querylength_adj = new->querylength = genomiclength;
+ new->genomicstart = genomicstart;
+ new->genomicend = genomicend;
+
+ if (genomicstart < genomicend) {
+ new->low = genomicstart;
+ new->high = genomicend;
+ } else {
+ new->low = genomicend;
+ new->high = genomicstart;
+ }
+ new->genomiclength = new->high - new->low;
+ new->guided_insertlength = 0U;
+ debug0(printf("Assigned %llu to low and %llu to high\n",(unsigned long long) new->low,(unsigned long long) new->high));
+
+
+ if (exactp == true) {
+ new->hittype = EXACT;
+ } else {
+ new->hittype = SUB;
+ }
+ new->genestrand = genestrand;
+ new->sarrayp = sarrayp;
+ new->gmap_source = GMAP_NOT_APPLICABLE;
+ new->improved_by_gmap_p = false;
+
+ new->chrnum = new->effective_chrnum = chrnum;
+ new->other_chrnum = 0;
+ new->chroffset = chroffset;
+ new->chrhigh = chrhigh;
+ new->chrlength = chrlength;
+ new->plusp = plusp;
+ new->sensedir = SENSE_NULL;
+
+#if 0
+ new->mapq_loglik = Substring_mapq_loglik(substring);
+ new->mapq_score = 0;
new->absmq_score = 0;
#endif
new->nindels = 0;
- new->indel_pos = 0;
- new->indel_low = 0;
new->nmismatches_whole = 0;
new->nmismatches_bothdiff = 0;
/* new->nmismatches_refdiff = 0; */
@@ -4904,29 +5784,13 @@ Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Comp
new->trim_left_splicep = false;
new->trim_right_splicep = false;
- new->penalties = 0;
+ /* new->penalties = 0; */
/* new->gene_overlap = NO_KNOWN_GENE; -- initialized later when resolving multimappers */
new->tally = -1L;
*found_score = 0;
- new->start_amb_length = new->end_amb_length = 0;
- new->start_amb_prob = new->end_amb_prob = 0.0;
- new->amb_length_donor = new->amb_length_acceptor = 0;
-
- new->start_ambiguous_p = new->end_ambiguous_p = false;
- new->start_ambcoords = new->end_ambcoords = (Univcoord_T *) NULL;
- new->ambcoords_donor = new->ambcoords_acceptor = (Univcoord_T *) NULL;
- new->start_amb_knowni = new->end_amb_knowni = (int *) NULL;
- new->amb_knowni_donor = new->amb_knowni_acceptor = (int *) NULL;
- new->start_amb_nmismatches = new->end_amb_nmismatches = (int *) NULL;
- new->amb_nmismatches_donor = new->amb_nmismatches_acceptor = (int *) NULL;
- new->start_amb_probs = new->end_amb_probs = (double *) NULL;
- new->amb_probs_donor = new->amb_probs_acceptor = (double *) NULL;
- new->start_nambcoords = new->end_nambcoords = 0;
- new->nambcoords_donor = new->nambcoords_acceptor = 0;
- new->nchimera_known = 0;
- new->nchimera_novel = 0;
+ new->nsplices = 0;
new->distance = 0U;
new->shortexonA_distance = new->shortexonD_distance = 0U;
@@ -4951,46 +5815,110 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
T new;
Substring_T substring;
Univcoord_T genomicstart, genomicend;
+ int outofbounds_start = 0, outofbounds_end = 0;
+
+ debug0(printf("Entered Stage3end_new_substitution at left %u and chrhigh %u, sarrayp %d\n",left,chrhigh,sarrayp));
if (plusp == true) {
- if ((genomicend = left + genomiclength) > chrhigh) {
+ genomicstart = left;
+ genomicend = left + genomiclength;
+ if (genomicstart < chroffset && genomicend > chrhigh) {
+ /* Out of bounds on both sides */
return (T) NULL;
- } else {
- genomicstart = left;
+
+ } else if (genomicstart < chroffset) {
+ outofbounds_start = chroffset - genomicstart;
+ outofbounds_end = genomicend - chroffset;
+ debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_start > outofbounds_end) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ outofbounds_start = 0;
+ } else {
+ /* Consider low part to be out of bounds and stay in this chromosome */
+ /* Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint); */
+ outofbounds_end = 0;
+ }
+
+ } else if (genomicend > chrhigh) {
+ outofbounds_start = chrhigh - genomicstart;
+ outofbounds_end = genomicend - chrhigh;
+ debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_start > outofbounds_end) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ outofbounds_start = 0;
+ } else if (++chrnum > nchromosomes) {
+ return (T) NULL;
+ } else {
+ /* Consider low part to be out of bounds and move to next chromosome */
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ outofbounds_end = 0;
+ }
}
+
} else {
- if ((genomicstart = left + genomiclength) > chrhigh) {
+ genomicend = left;
+ genomicstart = left + genomiclength;
+
+ if (genomicend < chroffset && genomicstart > chrhigh) {
+ /* Out of bounds on both ends */
return (T) NULL;
- } else {
- genomicend = left;
+
+ } else if (genomicend < chroffset) {
+ outofbounds_end = chroffset - genomicend;
+ outofbounds_start = genomicstart - chroffset;
+ debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_end > outofbounds_start) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ outofbounds_end = 0;
+ } else {
+ /* Consider low part to be out of bounds and stay in this chromosome */
+ /* Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint); */
+ outofbounds_start = 0;
+ }
+
+ } else if (genomicstart > chrhigh) {
+ outofbounds_end = chrhigh - genomicend;
+ outofbounds_start = genomicstart - chrhigh;
+ debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_end > outofbounds_start) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ outofbounds_end = 0;
+ } else if (++chrnum > nchromosomes) {
+ return (T) NULL;
+ } else {
+ /* Consider low part to be out of bounds and move to next chromosome */
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ outofbounds_start = 0;
+ }
}
}
- if ((substring = Substring_new(nmismatches_whole,chrnum,chroffset,chrhigh,chrlength,left,
- genomicstart,genomicend,/*genomicstart_adj*/genomicstart,/*genomicend_adj*/genomicend,
+ if ((substring = Substring_new(nmismatches_whole,chrnum,chroffset,chrhigh,chrlength,
query_compress,/*start_endtype*/END,/*end_endtype*/END,
/*querystart*/0,/*queryend*/genomiclength,/*querylength*/genomiclength,
/*alignstart*/genomicstart,/*alignend*/genomicend,
- genomiclength,/*extraleft*/0,/*extraright*/0,/*exactp*/false,
- plusp,genestrand,first_read_p,/*trim_left_p*/true,/*trim_right_p*/true,
- /*minlength*/genomiclength/2)) == NULL) {
+ genomiclength,/*exactp*/false,plusp,genestrand,first_read_p,/*trim_left_p*/true,/*trim_right_p*/true,
+ outofbounds_start,outofbounds_end,/*minlength*/genomiclength/2)) == NULL) {
+ debug0(printf("Returning NULL\n"));
return (T) NULL;
} else {
new = (T) MALLOC_OUT(sizeof(*new));
- debug0(printf("Stage3end_new_substitution %p: left %llu, chrnum %d, nmismatches %d\n",
- new,(unsigned long long) left,chrnum,nmismatches_whole));
+ debug0(printf("Stage3end_new_substitution %p: left %llu, chrnum %d, nmismatches %d, sarrayp %d\n",
+ new,(unsigned long long) left,chrnum,nmismatches_whole,sarrayp));
- new->substring1 = substring;
- new->substring2 = (Substring_T) NULL;
- new->substring0 = (Substring_T) NULL;
- new->substring_donor = new->substring_acceptor = (Substring_T) NULL;
- new->substringD = new->substringA = (Substring_T) NULL;
- new->substring_LtoH = List_push(NULL,(void *) new->substring1);
+ new->substrings_LtoH = List_push(NULL,(void *) substring);
+ new->substrings_1toN = List_push(NULL,(void *) substring);
+ new->substrings_Nto1 = List_push(NULL,(void *) substring);
+
+ new->junctions_LtoH = (List_T) NULL;
+ new->junctions_1toN = (List_T) NULL;
+ new->junctions_Nto1 = (List_T) NULL;
new->pairarray = (struct Pair_T *) NULL;
+ new->cigar_tokens = (List_T) NULL;
+ new->gmap_intronp = false;
- new->deletion = (char *) NULL;
new->querylength_adj = new->querylength = genomiclength;
new->genomicstart = genomicstart;
new->genomicend = genomicend;
@@ -5013,6 +5941,7 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
}
new->genestrand = genestrand;
new->sarrayp = sarrayp;
+ new->gmap_source = GMAP_NOT_APPLICABLE;
new->improved_by_gmap_p = false;
new->chrnum = new->effective_chrnum = chrnum;
@@ -5021,7 +5950,7 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
new->chrhigh = chrhigh;
new->chrlength = chrlength;
new->plusp = plusp;
- new->sensedir = new->sensedir_nonamb = SENSE_NULL;
+ new->sensedir = SENSE_NULL;
#if 0
new->mapq_loglik = Substring_mapq_loglik(substring);
@@ -5030,13 +5959,11 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
#endif
new->nindels = 0;
- new->indel_pos = 0;
- new->indel_low = 0;
new->nmismatches_whole = nmismatches_whole;
new->ntscore = nmismatches_whole;
new->score = nmismatches_whole;
- new->nmismatches_bothdiff = Substring_nmismatches_bothdiff(new->substring1);
+ new->nmismatches_bothdiff = Substring_nmismatches_bothdiff(substring);
/* new->nmismatches_refdiff = Substring_nmismatches_refdiff(new->substring1); */
#if 0
@@ -5044,8 +5971,8 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
new->nmatches = Substring_match_length(new->substring1) - new->total_nmismatches;
#else
/* This method is now correct for SNP-tolerant alignment */
- new->nmatches = Substring_nmatches(new->substring1);
- new->nmatches_posttrim = Substring_nmatches_posttrim(new->substring1);
+ new->nmatches = Substring_nmatches(substring);
+ new->nmatches_posttrim = Substring_nmatches_posttrim(substring);
#endif
new->trim_left = Substring_trim_left(substring);
@@ -5053,7 +5980,7 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
new->trim_left_splicep = Substring_trim_left_splicep(substring);
new->trim_right_splicep = Substring_trim_right_splicep(substring);
- new->penalties = 0;
+ /* new->penalties = 0; */
/* new->gene_overlap = NO_KNOWN_GENE; -- initialized later when resolving multimappers */
new->tally = -1L;
@@ -5062,24 +5989,7 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
*found_score = new->score;
}
- new->start_amb_length = new->end_amb_length = 0;
- new->start_amb_prob = new->end_amb_prob = 0.0;
- new->amb_length_donor = new->amb_length_acceptor = 0;
-
- new->start_ambiguous_p = new->end_ambiguous_p = false;
- new->start_ambcoords = new->end_ambcoords = (Univcoord_T *) NULL;
- new->ambcoords_donor = new->ambcoords_acceptor = (Univcoord_T *) NULL;
- new->start_amb_knowni = new->end_amb_knowni = (int *) NULL;
- new->amb_knowni_donor = new->amb_knowni_acceptor = (int *) NULL;
- new->start_amb_nmismatches = new->end_amb_nmismatches = (int *) NULL;
- new->amb_nmismatches_donor = new->amb_nmismatches_acceptor = (int *) NULL;
- new->start_amb_probs = new->end_amb_probs = (double *) NULL;
- new->amb_probs_donor = new->amb_probs_acceptor = (double *) NULL;
-
- new->start_nambcoords = new->end_nambcoords = 0;
- new->nambcoords_donor = new->nambcoords_acceptor = 0;
- new->nchimera_known = 0;
- new->nchimera_novel = 0;
+ new->nsplices = 0;
new->distance = 0U;
new->shortexonA_distance = new->shortexonD_distance = 0U;
@@ -5090,6 +6000,7 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
new->circularpos = compute_circularpos(&new->alias,new);
+ debug(printf("Returning substitution %p\n",new));
return new;
}
}
@@ -5104,15 +6015,18 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
int indel_penalty, bool sarrayp) {
T new;
Substring_T substring1, substring2;
+ Junction_T junction;
int querystart1, queryend1, querystart2, queryend2;
- Univcoord_T genomicstart, genomicend;
+ Univcoord_T genomicstart1, genomicend1, genomicstart2, genomicend2;
Univcoord_T genomicstart_adj_2, genomicend_adj_2;
Univcoord_T alignstart1, alignend1, alignstart2, alignend2;
+ int outofbounds_start = 0, outofbounds_end = 0;
+
debug2(printf("Entered with left %llu, querylength %d, genomiclength %d, indel_pos %d\n",
(unsigned long long) left,querylength,genomiclength,indel_pos));
- debug2(printf("q: %s\n",query));
#if 0
+ debug2(printf("q: %s\n",query));
debug2(printf("g: %s\n",genomicseg));
#endif
@@ -5124,89 +6038,179 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
queryend2 = querylength;
if (plusp == true) {
- genomicstart = left;
- genomicend = left + genomiclength;
-
- genomicstart_adj_2 = genomicstart - nindels;
- genomicend_adj_2 = genomicend - nindels;
+ alignstart1 = left /*+ querystart1 (0)*/;
+ alignend1 = alignstart2 = left + /*queryend1*/indel_pos;
+ alignend2 = (left - nindels) + /*queryend2*/querylength;
- alignstart1 = genomicstart;
- alignend1 = alignstart2 = genomicstart + indel_pos;
- alignend2 = genomicend/* - nindels*/;
+ genomicstart1 = alignstart1;
+ genomicend1 = alignend1;
+ genomicstart2 = alignstart2;
+ genomicend2 = alignend2;
- if (genomicend > chrhigh) {
+ if (genomicstart1 < chroffset && genomicend2 > chrhigh) {
+ /* Out of bounds on both sides */
return (T) NULL;
- }
- } else {
- genomicend = left;
- genomicstart = left + genomiclength;
+ } else if (genomicstart1 < chroffset) {
+ outofbounds_start = chroffset - genomicstart1;
+ outofbounds_end = genomicend2 - chroffset;
+ debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_start > outofbounds_end) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ if (querylength - indel_pos - nindels < outofbounds_end) {
+ /* indel is in eliminated part, so abort */
+ return (T) NULL;
+ }
+ outofbounds_start = 0;
+ } else {
+ /* Consider low part to be out of bounds and stay in this chromosome */
+ if (indel_pos < outofbounds_start) {
+ /* indel is in eliminated part, so abort */
+ return (T) NULL;
+ }
+ /* Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint); */
+ outofbounds_end = 0;
+ }
+
+ } else if (genomicend2 > chrhigh) {
+ outofbounds_start = chrhigh - genomicstart1;
+ outofbounds_end = genomicend2 - chrhigh;
+ debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_start > outofbounds_end) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ if (querylength - indel_pos - nindels < outofbounds_end) {
+ /* indel is in eliminated part, so abort */
+ return (T) NULL;
+ }
+ outofbounds_start = 0;
+ } else if (++chrnum > nchromosomes) {
+ return (T) NULL;
+ } else {
+ /* Consider low part to be out of bounds and move to next chromosome */
+ if (indel_pos < outofbounds_start) {
+ /* indel is in eliminated part, so abort */
+ return (T) NULL;
+ }
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ outofbounds_end = 0;
+ }
+ }
- genomicstart_adj_2 = genomicstart + nindels;
- genomicend_adj_2 = genomicend + nindels;
+ } else {
+ alignstart1 = (left - nindels) + (querylength /*- querystart (0)*/);
+ alignend1 = alignstart2 = (left - nindels) + (querylength - indel_pos);
+ alignend2 = left /* + (querylength - queryend)*/;
- alignstart1 = genomicstart;
- alignend1 = alignstart2 = genomicstart - indel_pos;
- alignend2 = genomicend/* + nindels*/;
+ genomicstart1 = alignstart1;
+ genomicend1 = alignend1;
+ genomicstart2 = alignstart2;
+ genomicend2 = alignend2;
- if (genomicstart > chrhigh) {
+ if (genomicend2 < chroffset && genomicstart1 > chrhigh) {
+ /* Out of bounds on both sides */
return (T) NULL;
+
+ } else if (genomicend2 < chroffset) {
+ outofbounds_end = chroffset - genomicend2;
+ outofbounds_start = genomicstart1 - chroffset;
+ debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_end > outofbounds_start) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ if (indel_pos < outofbounds_start) {
+ /* indel is in eliminated part, so abort */
+ return (T) NULL;
+ }
+ outofbounds_end = 0;
+ } else {
+ /* Consider low part to be out of bounds and stay in this chromosome */
+ if (querylength - indel_pos - nindels < outofbounds_end) {
+ /* indel is in eliminated part, so abort */
+ return (T) NULL;
+ }
+ /* Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint); */
+ outofbounds_start = 0;
+ }
+
+ } else if (genomicstart1 > chrhigh) {
+ outofbounds_end = chrhigh - genomicend2;
+ outofbounds_start = genomicstart1 - chrhigh;
+ debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_end > outofbounds_start) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ if (indel_pos < outofbounds_start) {
+ /* indel is in eliminated part, so abort */
+ return (T) NULL;
+ }
+ outofbounds_end = 0;
+ } else if (++chrnum > nchromosomes) {
+ return (T) NULL;
+ } else {
+ /* Consider low part to be out of bounds and move to next chromosome */
+ if (querylength - indel_pos - nindels < outofbounds_end) {
+ /* indel is in eliminated part, so abort */
+ return (T) NULL;
+ }
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ outofbounds_start = 0;
+ }
}
}
- if ((substring1 = Substring_new(nmismatches1_whole,chrnum,chroffset,chrhigh,chrlength,left,
- genomicstart,genomicend,/*genomicstart_adj*/genomicstart,/*genomicend_adj*/genomicend,
+ if ((substring1 = Substring_new(nmismatches1_whole,chrnum,chroffset,chrhigh,chrlength,
query_compress,/*start_endtype*/END,/*end_endtype*/INS,
querystart1,queryend1,querylength,alignstart1,alignend1,genomiclength,
- /*extraleft*/0,/*extraright*/0,/*exactp*/false,plusp,genestrand,first_read_p,
+ /*exactp*/false,plusp,genestrand,first_read_p,
/*trim_left_p (previously was end1_indel_p ? false : true)*/true,
- /*trim_right_p*/false,/*minlength*/0)) == NULL) {
+ /*trim_right_p*/false,outofbounds_start,/*outofbounds_end*/0,/*minlength*/0)) == NULL) {
return (T) NULL;
- } else if ((substring2 = Substring_new(nmismatches2_whole,chrnum,chroffset,chrhigh,chrlength,left,
- genomicstart,genomicend,genomicstart_adj_2,genomicend_adj_2,
+ } else if ((substring2 = Substring_new(nmismatches2_whole,chrnum,chroffset,chrhigh,chrlength,
query_compress,/*start_endtype*/INS,/*end_endtype*/END,
querystart2,queryend2,querylength,alignstart2,alignend2,genomiclength,
- /*extraleft*/0,/*extraright*/0,/*exactp*/false,plusp,genestrand,first_read_p,
- /*trim_left_p*/false,
- /*trim_right_p (previously was end2_indel_p ? false : true)*/true,
- /*minlength*/0)) == NULL) {
+ /*exactp*/false,plusp,genestrand,first_read_p,
+ /*trim_left_p*/false,/*trim_right_p (previously was end2_indel_p ? false : true)*/true,
+ /*outofbounds_start*/0,outofbounds_end,/*minlength*/0)) == NULL) {
Substring_free(&substring1);
return (T) NULL;
} else {
new = (T) MALLOC_OUT(sizeof(*new));
- debug0(printf("Stage3end_new_insertion %p: left %llu, chrnum %d, nmismatches %d+%d, indel_pos %d, nindels %d\n",
- new,(unsigned long long) left,chrnum,nmismatches1_whole,nmismatches2_whole,indel_pos,nindels));
+ debug0(printf("Stage3end_new_insertion %p: left %llu, chrnum %d, nmismatches %d+%d, indel_pos %d, nindels %d, sarrayp %d\n",
+ new,(unsigned long long) left,chrnum,nmismatches1_whole,nmismatches2_whole,indel_pos,nindels,sarrayp));
+
+ new->substrings_1toN = List_push(NULL,substring2);
+ new->substrings_1toN = List_push(new->substrings_1toN,substring1);
- new->substring1 = substring1;
- new->substring2 = substring2;
- new->substring0 = (Substring_T) NULL;
- new->substring_donor = new->substring_acceptor = (Substring_T) NULL;
- new->substringD = new->substringA = (Substring_T) NULL;
+ new->substrings_Nto1 = List_push(NULL,substring1);
+ new->substrings_Nto1 = List_push(new->substrings_Nto1,substring2);
- new->indel_pos = indel_pos;
if (plusp == true) {
- new->substring_LtoH = List_push(List_push(NULL,new->substring2),new->substring1);
- new->indel_low = indel_pos;
+ new->substrings_LtoH = List_push(NULL,substring2);
+ new->substrings_LtoH = List_push(new->substrings_LtoH,substring1);
} else {
- new->substring_LtoH = List_push(List_push(NULL,new->substring1),new->substring2);
- new->indel_low = querylength - indel_pos;
+ new->substrings_LtoH = List_push(NULL,substring1);
+ new->substrings_LtoH = List_push(new->substrings_LtoH,substring2);
}
+ junction = Junction_new_insertion(nindels);
+ new->junctions_LtoH = List_push(NULL,junction);
+ new->junctions_1toN = List_push(NULL,junction);
+ new->junctions_Nto1 = List_push(NULL,junction);
new->pairarray = (struct Pair_T *) NULL;
+ new->cigar_tokens = (List_T) NULL;
+ new->gmap_intronp = false;
- new->deletion = (char *) NULL;
- new->querylength_adj = new->querylength = querylength /* - nindels */;
- new->genomicstart = genomicstart;
- new->genomicend = genomicend;
+ new->querylength = querylength;
+ new->querylength_adj = querylength - nindels;
+ new->genomicstart = genomicstart1;
+ new->genomicend = genomicend2;
- if (genomicstart < genomicend) {
- new->low = genomicstart;
- new->high = genomicend;
+ if (genomicstart1 < genomicend2) {
+ new->low = genomicstart1;
+ new->high = genomicend2;
} else {
- new->low = genomicend;
- new->high = genomicstart;
+ new->low = genomicend2;
+ new->high = genomicstart1;
}
new->genomiclength = new->high - new->low;
new->guided_insertlength = 0U;
@@ -5214,6 +6218,7 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
new->hittype = INSERTION;
new->genestrand = genestrand;
new->sarrayp = sarrayp;
+ new->gmap_source = GMAP_NOT_APPLICABLE;
new->improved_by_gmap_p = false;
new->chrnum = new->effective_chrnum = chrnum;
@@ -5222,7 +6227,7 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
new->chrhigh = chrhigh;
new->chrlength = chrlength;
new->plusp = plusp;
- new->sensedir = new->sensedir_nonamb = SENSE_NULL;
+ new->sensedir = SENSE_NULL;
#if 0
new->mapq_loglik = Substring_mapq_loglik(substring1) + Substring_mapq_loglik(substring2) +
@@ -5236,16 +6241,16 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
new->ntscore = indel_penalty + nmismatches1_whole + nmismatches2_whole;
new->score = new->ntscore;
- new->nmismatches_bothdiff = Substring_nmismatches_bothdiff(new->substring1) + Substring_nmismatches_bothdiff(new->substring2);
+ new->nmismatches_bothdiff = Substring_nmismatches_bothdiff(substring1) + Substring_nmismatches_bothdiff(substring2);
/* new->nmismatches_refdiff = Substring_nmismatches_refdiff(new->substring1) + Substring_nmismatches_refdiff(new->substring2); */
#if 0
/* This method is correct for SNP-tolerant alignment */
- new->nmatches = Substring_match_length(new->substring1) + Substring_match_length(new->substring2) - new->total_nmismatches;
+ new->nmatches = Substring_match_length(substring1) + Substring_match_length(substring2) - new->total_nmismatches;
#else
/* This method is now correct for SNP-tolerant alignment */
- new->nmatches = Substring_nmatches(new->substring1) + Substring_nmatches(new->substring2);
- new->nmatches_posttrim = Substring_nmatches_posttrim(new->substring1) + Substring_nmatches_posttrim(new->substring2);
+ new->nmatches = Substring_nmatches(substring1) + Substring_nmatches(substring2);
+ new->nmatches_posttrim = Substring_nmatches_posttrim(substring1) + Substring_nmatches_posttrim(substring2);
new->nmatches_posttrim += nindels; /* for use in goodness_cmp procedures */
new->nmatches_posttrim -= indel_penalty; /* for use in goodness_cmp procedures */
#endif
@@ -5255,12 +6260,14 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
new->trim_left_splicep = Substring_trim_left_splicep(substring1);
new->trim_right_splicep = Substring_trim_right_splicep(substring2);
+#if 0
#ifdef SCORE_INDELS
/* indel_penalty will be counted later */
new->penalties = 0;
#else
new->penalties = indel_penalty;
#endif
+#endif
/* new->gene_overlap = NO_KNOWN_GENE; -- initialized later when resolving multimappers */
new->tally = -1L;
@@ -5269,24 +6276,7 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
*found_score = new->score;
}
- new->start_amb_length = new->end_amb_length = 0;
- new->start_amb_prob = new->end_amb_prob = 0.0;
- new->amb_length_donor = new->amb_length_acceptor = 0;
-
- new->start_ambiguous_p = new->end_ambiguous_p = false;
- new->start_ambcoords = new->end_ambcoords = (Univcoord_T *) NULL;
- new->ambcoords_donor = new->ambcoords_acceptor = (Univcoord_T *) NULL;
- new->start_amb_knowni = new->end_amb_knowni = (int *) NULL;
- new->amb_knowni_donor = new->amb_knowni_acceptor = (int *) NULL;
- new->start_amb_nmismatches = new->end_amb_nmismatches = (int *) NULL;
- new->amb_nmismatches_donor = new->amb_nmismatches_acceptor = (int *) NULL;
- new->start_amb_probs = new->end_amb_probs = (double *) NULL;
- new->amb_probs_donor = new->amb_probs_acceptor = (double *) NULL;
-
- new->start_nambcoords = new->end_nambcoords = 0;
- new->nambcoords_donor = new->nambcoords_acceptor = 0;
- new->nchimera_known = 0;
- new->nchimera_novel = 0;
+ new->nsplices = 0;
new->distance = 0U;
new->shortexonA_distance = new->shortexonD_distance = 0U;
@@ -5310,10 +6300,13 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
int indel_penalty, bool sarrayp) {
T new;
Substring_T substring1, substring2;
+ Junction_T junction;
int querystart1, queryend1, querystart2, queryend2;
- Univcoord_T genomicstart, genomicend;
+ Univcoord_T genomicstart1, genomicend1, genomicstart2, genomicend2;
Univcoord_T genomicstart_adj_2, genomicend_adj_2;
Univcoord_T alignstart1, alignend1, alignstart2, alignend2;
+ int outofbounds_start = 0, outofbounds_end = 0;
+
debug3(printf("Entered with left %llu, querylength %d, genomiclength %d, indel_pos %d\n",
(unsigned long long) left,querylength,genomiclength,indel_pos));
@@ -5326,120 +6319,206 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
querystart1 = 0;
queryend1 = indel_pos;
- querystart2 = indel_pos; /* Do not add nindels */
+ querystart2 = indel_pos; /* Do not add nindels */
queryend2 = querylength;
- if (plusp == true) {
- genomicstart = left;
- genomicend = left + genomiclength;
- genomicstart_adj_2 = genomicstart + nindels;
- genomicend_adj_2 = genomicend + nindels;
+ if (plusp == true) {
+ alignstart1 = left /*+ querystart1 (0)*/;
+ alignend1 = left + indel_pos;
+ alignstart2 = (left + nindels) + indel_pos;
+ alignend2 = (left + nindels) + querylength;
- alignstart1 = genomicstart;
- alignend1 = genomicstart + indel_pos;
- alignstart2 = alignend1 + nindels;
- alignend2 = genomicend/* + nindels*/;
+ genomicstart1 = alignstart1;
+ genomicend1 = alignend1;
+ genomicstart2 = alignstart2;
+ genomicend2 = alignend2;
- debug3(printf("plusp is true. genomicstart %llu, genomicend %llu, alignstart1 %llu, alignend1 %llu, alignstart2 %llu, alignend2 %llu, left1 %llu, left2 %llu\n",
+ debug3(printf("plusp is true. genomicstart %llu, genomicend %llu, alignstart1 %llu, alignend1 %llu, alignstart2 %llu, alignend2 %llu, left1 %llu\n",
(unsigned long long) genomicstart,(unsigned long long) genomicend,
(unsigned long long) alignstart1,(unsigned long long) alignend1,(unsigned long long) alignstart2,
- (unsigned long long) alignend2,(unsigned long long) left,(unsigned long long) left2));
+ (unsigned long long) alignend2,(unsigned long long) left));
- if (genomicend > chrhigh) {
+
+ if (genomicstart1 < chroffset && genomicend2 > chrhigh) {
+ /* Out of bounds on both sides */
return (T) NULL;
+
+ } else if (genomicstart1 < chroffset) {
+ outofbounds_start = chroffset - genomicstart1;
+ outofbounds_end = genomicend2 - chroffset;
+ debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_start > outofbounds_end) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ if (querylength - indel_pos - nindels < outofbounds_end) {
+ /* indel is in eliminated part, so abort */
+ return (T) NULL;
+ }
+ outofbounds_start = 0;
+ } else {
+ /* Consider low part to be out of bounds and stay in this chromosome */
+ if (indel_pos < outofbounds_start) {
+ /* indel is in eliminated part, so abort */
+ return (T) NULL;
+ }
+ /* Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint); */
+ outofbounds_end = 0;
+ }
+
+ } else if (genomicend2 > chrhigh) {
+ outofbounds_start = chrhigh - genomicstart1;
+ outofbounds_end = genomicend2 - chrhigh;
+ debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_start > outofbounds_end) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ if (querylength - indel_pos - nindels < outofbounds_end) {
+ /* indel is in eliminated part, so abort */
+ return (T) NULL;
+ }
+ outofbounds_start = 0;
+ } else if (++chrnum > nchromosomes) {
+ return (T) NULL;
+ } else {
+ /* Consider low part to be out of bounds and move to next chromosome */
+ if (indel_pos < outofbounds_start) {
+ /* indel is in eliminated part, so abort */
+ return (T) NULL;
+ }
+ /* Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint); */
+ outofbounds_end = 0;
+ }
}
-
- } else {
- genomicend = left;
- genomicstart = left + genomiclength;
- genomicstart_adj_2 = genomicstart - nindels;
- genomicend_adj_2 = genomicend - nindels;
+ } else {
+ alignstart1 = left + (querylength /*- querystart (0)*/);
+ alignend1 = left + (querylength - indel_pos);
+ alignstart2 = (left - nindels) + (querylength - indel_pos);
+ alignend2 = (left - nindels) /*+ querylength - queryend (querylength)*/;
- alignstart1 = genomicstart;
- alignend1 = genomicstart - indel_pos;
- alignstart2 = alignend1 - nindels;
- alignend2 = genomicend/* - nindels*/;
+ genomicstart1 = alignstart1;
+ genomicend1 = alignend1;
+ genomicstart2 = alignstart2;
+ genomicend2 = alignend2;
- debug3(printf("plusp is false. genomicstart %llu, genomicend %llu, alignstart1 %llu, alignend1 %llu, alignstart2 %llu, alignend2 %llu, left1 %llu, left2 %llu\n",
+ debug3(printf("plusp is false. genomicstart %llu, genomicend %llu, alignstart1 %llu, alignend1 %llu, alignstart2 %llu, alignend2 %llu, left1 %llu\n",
(unsigned long long) genomicstart,(unsigned long long) genomicend,
(unsigned long long) alignstart1,(unsigned long long) alignend1,(unsigned long long) alignstart2,
- (unsigned long long) alignend2,(unsigned long long) left,(unsigned long long) left2));
+ (unsigned long long) alignend2,(unsigned long long) left));
- if (genomicstart > chrhigh) {
+ if (genomicend2 < chroffset && genomicstart1 > chrhigh) {
+ /* Out of bounds on both sides */
return (T) NULL;
+
+ } else if (genomicend2 < chroffset) {
+ outofbounds_end = chroffset - genomicend2;
+ outofbounds_start = genomicstart1 - chroffset;
+ debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_end > outofbounds_start) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ if (indel_pos < outofbounds_start) {
+ /* indel is in eliminated part, so abort */
+ return (T) NULL;
+ }
+ outofbounds_end = 0;
+ } else {
+ /* Consider low part to be out of bounds. Stay in this chromosome */
+ if (querylength - indel_pos - nindels < outofbounds_end) {
+ /* indel is in eliminated part, so abort */
+ return (T) NULL;
+ }
+ /* Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint); */
+ outofbounds_start = 0;
+ }
+
+ } else if (genomicstart1 > chrhigh) {
+ outofbounds_end = chrhigh - genomicend2;
+ outofbounds_start = genomicstart1 - chrhigh;
+ debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_end > outofbounds_start) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ if (indel_pos < outofbounds_start) {
+ /* indel is in eliminated part, so abort */
+ return (T) NULL;
+ }
+ outofbounds_end = 0;
+ } else if (++chrnum > nchromosomes) {
+ return (T) NULL;
+ } else {
+ /* Consider low part to be out of bounds and move to next chromosome */
+ if (querylength - indel_pos - nindels < outofbounds_end) {
+ /* indel is in eliminated part, so abort */
+ return (T) NULL;
+ }
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ outofbounds_start = 0;
+ }
}
}
- if ((substring1 = Substring_new(nmismatches1_whole,chrnum,chroffset,chrhigh,chrlength,left,
- genomicstart,genomicend,/*genomicstart_adj*/genomicstart,/*genomicend_adj*/genomicend,
+ if ((substring1 = Substring_new(nmismatches1_whole,chrnum,chroffset,chrhigh,chrlength,
query_compress,/*start_endtype*/END,/*end_endtype*/DEL,
querystart1,queryend1,querylength,alignstart1,alignend1,genomiclength,
- /*extraleft*/0,/*extraright*/0,/*exactp*/false,plusp,genestrand,first_read_p,
+ /*exactp*/false,plusp,genestrand,first_read_p,
/*trim_left_p (previously was end1_indel_p ? false : true)*/true,
- /*trim_right_p*/false,/*minlength*/0)) == NULL) {
+ /*trim_right_p*/false,outofbounds_start,/*outofbounds_end*/0,/*minlength*/0)) == NULL) {
return (T) NULL;
- } else if ((substring2 = Substring_new(nmismatches2_whole,chrnum,chroffset,chrhigh,chrlength,left,
- genomicstart,genomicend,genomicstart_adj_2,genomicend_adj_2,
+ } else if ((substring2 = Substring_new(nmismatches2_whole,chrnum,chroffset,chrhigh,chrlength,
query_compress,/*start_endtype*/DEL,/*end_endtype*/END,
querystart2,queryend2,querylength,alignstart2,alignend2,genomiclength,
- /*extraleft*/0,/*extraright*/0,/*exactp*/false,plusp,genestrand,first_read_p,
- /*trim_left_p*/false,
- /*trim_right_p (previously was end2_indel_p ? false : true) */true,
- /*minlength*/0)) == NULL) {
+ /*exactp*/false,plusp,genestrand,first_read_p,
+ /*trim_left_p*/false,/*trim_right_p (previously was end2_indel_p ? false : true) */true,
+ /*outofbounds_start*/0,outofbounds_end,/*minlength*/0)) == NULL) {
Substring_free(&substring1);
return (T) NULL;
-
+
} else {
new = (T) MALLOC_OUT(sizeof(*new));
- debug0(printf("Stage3end_new_deletion %p: left %llu, chrnum %d, nmismatches %d+%d, indel_pos %d, nindels %d\n",
- new,(unsigned long long) left,chrnum,nmismatches1_whole,nmismatches2_whole,indel_pos,nindels));
-
- new->substring1 = substring1;
- new->substring2 = substring2;
- new->substring0 = (Substring_T) NULL;
- new->substring_donor = new->substring_acceptor = (Substring_T) NULL;
- new->substringD = new->substringA = (Substring_T) NULL;
+ debug0(printf("Stage3end_new_deletion %p: left %llu, chrnum %d, nmismatches %d+%d, indel_pos %d, nindels %d, sarrayp %d\n",
+ new,(unsigned long long) left,chrnum,nmismatches1_whole,nmismatches2_whole,indel_pos,nindels,sarrayp));
new->pairarray = (struct Pair_T *) NULL;
+ new->cigar_tokens = (List_T) NULL;
+ new->gmap_intronp = false;
-#if 0
- new->deletion = (char *) CALLOC_OUT(nindels+1,sizeof(char));
+ /* Deletion contents are always from plus genomic strand */
if (plusp == true) {
- strncpy(new->deletion,&(genomicseg[indel_pos]),nindels);
- new->substring_low = new->substring1;
- new->substring_high = new->substring2;
+ junction = Junction_new_deletion(nindels,/*deletionpos*/left + indel_pos);
} else {
- make_complement_buffered(new->deletion,&(genomicseg[querylength-indel_pos]),nindels);
- new->substring_low = new->substring2;
- new->substring_high = new->substring1;
+ junction = Junction_new_deletion(nindels,/*deletionpos*/left + (querylength - indel_pos));
}
-#else
+ new->junctions_LtoH = List_push(NULL,junction);
+ new->junctions_1toN = List_push(NULL,junction);
+ new->junctions_Nto1 = List_push(NULL,junction);
+
+
/* Initialize so Substring_free will not try to free */
- new->deletion = (char *) NULL;
- new->indel_pos = indel_pos;
+ /* Filled in by Stage3end_display_prep */
+ new->substrings_1toN = List_push(NULL,substring2);
+ new->substrings_1toN = List_push(new->substrings_1toN,substring1);
+
+ new->substrings_Nto1 = List_push(NULL,substring1);
+ new->substrings_Nto1 = List_push(new->substrings_Nto1,substring2);
+
if (plusp == true) {
- new->substring_LtoH = List_push(List_push(NULL,new->substring2),new->substring1);
- new->indel_low = indel_pos;
+ new->substrings_LtoH = List_push(NULL,substring2);
+ new->substrings_LtoH = List_push(new->substrings_LtoH,substring1);
} else {
- new->substring_LtoH = List_push(List_push(NULL,new->substring1),new->substring2);
- new->indel_low = querylength - indel_pos;
+ new->substrings_LtoH = List_push(NULL,substring1);
+ new->substrings_LtoH = List_push(new->substrings_LtoH,substring2);
}
-#endif
new->querylength = querylength;
new->querylength_adj = querylength + nindels;
- new->genomicstart = genomicstart;
- new->genomicend = genomicend;
+ new->genomicstart = genomicstart1;
+ new->genomicend = genomicend2;
- if (genomicstart < genomicend) {
- new->low = genomicstart;
- new->high = genomicend;
+ if (genomicstart1 < genomicend2) {
+ new->low = genomicstart1;
+ new->high = genomicend2;
} else {
- new->low = genomicend;
- new->high = genomicstart;
+ new->low = genomicend2;
+ new->high = genomicstart1;
}
new->genomiclength = new->high - new->low;
new->guided_insertlength = 0U;
@@ -5447,6 +6526,7 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
new->hittype = DELETION;
new->genestrand = genestrand;
new->sarrayp = sarrayp;
+ new->gmap_source = GMAP_NOT_APPLICABLE;
new->improved_by_gmap_p = false;
new->chrnum = new->effective_chrnum = chrnum;
@@ -5455,7 +6535,7 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
new->chrhigh = chrhigh;
new->chrlength = chrlength;
new->plusp = plusp;
- new->sensedir = new->sensedir_nonamb = SENSE_NULL;
+ new->sensedir = SENSE_NULL;
#if 0
new->mapq_loglik = Substring_mapq_loglik(substring1) + Substring_mapq_loglik(substring2);
@@ -5468,16 +6548,16 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
new->ntscore = indel_penalty + nmismatches1_whole + nmismatches2_whole;
new->score = new->ntscore;
- new->nmismatches_bothdiff = Substring_nmismatches_bothdiff(new->substring1) + Substring_nmismatches_bothdiff(new->substring2);
- /* new->nmismatches_refdiff = Substring_nmismatches_refdiff(new->substring1) + Substring_nmismatches_refdiff(new->substring2); */
+ new->nmismatches_bothdiff = Substring_nmismatches_bothdiff(substring1) + Substring_nmismatches_bothdiff(substring2);
+ /* new->nmismatches_refdiff = Substring_nmismatches_refdiff(substring1) + Substring_nmismatches_refdiff(substring2); */
#if 0
/* This method is correct for SNP-tolerant alignment */
- new->nmatches = Substring_match_length(new->substring1) + Substring_match_length(new->substring2) - new->total_nmismatches;
+ new->nmatches = Substring_match_length(substring1) + Substring_match_length(substring2) - new->total_nmismatches;
#else
/* This method is now correct for SNP-tolerant alignment */
- new->nmatches = Substring_nmatches(new->substring1) + Substring_nmatches(new->substring2);
- new->nmatches_posttrim = Substring_nmatches_posttrim(new->substring1) + Substring_nmatches_posttrim(new->substring2);
+ new->nmatches = Substring_nmatches(substring1) + Substring_nmatches(substring2);
+ new->nmatches_posttrim = Substring_nmatches_posttrim(substring1) + Substring_nmatches_posttrim(substring2);
new->nmatches_posttrim -= indel_penalty; /* for use in goodness_cmp procedures */
#endif
@@ -5486,12 +6566,14 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
new->trim_left_splicep = Substring_trim_left_splicep(substring1);
new->trim_right_splicep = Substring_trim_right_splicep(substring2);
+#if 0
#ifdef SCORE_INDELS
/* indel_penalty will be counted later */
new->penalties = 0;
#else
new->penalties = indel_penalty;
#endif
+#endif
/* new->gene_overlap = NO_KNOWN_GENE; -- initialized later when resolving multimappers */
new->tally = -1L;
@@ -5500,24 +6582,7 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
*found_score = new->score;
}
- new->start_amb_length = new->end_amb_length = 0;
- new->start_amb_prob = new->end_amb_prob = 0.0;
- new->amb_length_donor = new->amb_length_acceptor = 0;
-
- new->start_ambiguous_p = new->end_ambiguous_p = false;
- new->start_ambcoords = new->end_ambcoords = (Univcoord_T *) NULL;
- new->ambcoords_donor = new->ambcoords_acceptor = (Univcoord_T *) NULL;
- new->start_amb_knowni = new->end_amb_knowni = (int *) NULL;
- new->amb_knowni_donor = new->amb_knowni_acceptor = (int *) NULL;
- new->start_amb_nmismatches = new->end_amb_nmismatches = (int *) NULL;
- new->amb_nmismatches_donor = new->amb_nmismatches_acceptor = (int *) NULL;
- new->start_amb_probs = new->end_amb_probs = (double *) NULL;
- new->amb_probs_donor = new->amb_probs_acceptor = (double *) NULL;
-
- new->start_nambcoords = new->end_nambcoords = 0;
- new->nambcoords_donor = new->nambcoords_acceptor = 0;
- new->nchimera_known = 0;
- new->nchimera_novel = 0;
+ new->nsplices = 0;
new->distance = 0U;
new->shortexonA_distance = new->shortexonD_distance = 0U;
@@ -5534,9 +6599,11 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
/* Never returns NULL */
+/* Previously new->substring1 was donor and new->substring2 was acceptor */
T
Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_acceptor,
- Substring_T donor, Substring_T acceptor, Chrpos_T distance,
+ Substring_T donor, Substring_T acceptor,
+ double donor_prob, double acceptor_prob, Chrpos_T distance,
bool shortdistancep, int splicing_penalty, int querylength, int amb_length, double amb_prob,
#ifdef LARGE_GENOMES
Uint8list_T ambcoords_donor, Uint8list_T ambcoords_acceptor,
@@ -5549,61 +6616,65 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
bool copy_donor_p, bool copy_acceptor_p, bool first_read_p, int sensedir,
bool sarrayp) {
T new;
- int ignore;
Substring_T substring_for_concordance; /* always the inner substring */
+ Substring_T substring_other; /* the outer substring */
+ Substring_T substring;
+ Junction_T junction;
#ifdef DEBUG0
int i;
#endif
-
+
new = (T) MALLOC_OUT(sizeof(*new));
- debug0(printf("Stage3end_new_splice %p with sensedir %d, donor substring %p and acceptor substring %p, and amb_length %d\n",
- new,sensedir,donor,acceptor,amb_length));
+ debug0(printf("Stage3end_new_splice %p with sensedir %d, donor substring %p and acceptor substring %p, and amb_length %d, sarrayp %d\n",
+ new,sensedir,donor,acceptor,amb_length,sarrayp));
+
#if 0
assert(Substring_match_length_orig(donor) + Substring_match_length_orig(acceptor) + amb_length == querylength);
#endif
- new->deletion = (char *) NULL;
new->querylength_adj = new->querylength = querylength;
+#if 0
if (donor == NULL) {
- new->substring1 = copy_acceptor_p ? Substring_copy(acceptor) : acceptor;
- new->substring2 = (Substring_T) NULL;
+ /* new->substring1 = copy_acceptor_p ? Substring_copy(acceptor) : acceptor; */
+ /* new->substring2 = (Substring_T) NULL; */
new->substring_donor = (Substring_T) NULL;
new->substring_acceptor = new->substring1;
-
+
} else if (acceptor == NULL) {
- new->substring1 = copy_donor_p ? Substring_copy(donor) : donor;
- new->substring2 = (Substring_T) NULL;
+ /* new->substring1 = copy_donor_p ? Substring_copy(donor) : donor; */
+ /* new->substring2 = (Substring_T) NULL; */
new->substring_donor = new->substring1;
new->substring_acceptor = (Substring_T) NULL;
} else {
- if (sensedir == SENSE_FORWARD) {
- new->substring1 = copy_donor_p ? Substring_copy(donor) : donor;
- new->substring2 = copy_acceptor_p ? Substring_copy(acceptor) : acceptor;
+ if (sensedir != SENSE_ANTI) {
+ /* SENSE_FORWARD or SENSE_NULL */
+ /* new->substring1 = copy_donor_p ? Substring_copy(donor) : donor; */
+ /* new->substring2 = copy_acceptor_p ? Substring_copy(acceptor) : acceptor; */
new->substring_donor = new->substring1;
new->substring_acceptor = new->substring2;
- } else if (sensedir == SENSE_ANTI) {
- new->substring1 = copy_acceptor_p ? Substring_copy(acceptor) : acceptor;
- new->substring2 = copy_donor_p ? Substring_copy(donor) : donor;
+ } else {
+ /* new->substring1 = copy_acceptor_p ? Substring_copy(acceptor) : acceptor; */
+ /* new->substring2 = copy_donor_p ? Substring_copy(donor) : donor; */
new->substring_donor = new->substring2;
new->substring_acceptor = new->substring1;
- } else {
- abort();
}
}
- new->substring0 = (Substring_T) NULL;
- new->substringD = new->substringA = (Substring_T) NULL;
+#endif
+
new->nindels = 0;
- new->indel_pos = 0;
- new->indel_low = 0;
new->pairarray = (struct Pair_T *) NULL;
+ new->cigar_tokens = (List_T) NULL;
+ new->gmap_intronp = false;
new->sarrayp = sarrayp;
+ new->gmap_source = GMAP_NOT_APPLICABLE;
new->improved_by_gmap_p = false;
+
if (donor == NULL) {
- new->hittype = HALFSPLICE_ACCEPTOR;
+ new->hittype = SPLICE;
new->genestrand = Substring_genestrand(acceptor);
new->chrnum = Substring_chrnum(acceptor);
new->chroffset = Substring_chroffset(acceptor);
@@ -5612,7 +6683,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->plusp = Substring_plusp(acceptor);
} else if (acceptor == NULL) {
- new->hittype = HALFSPLICE_DONOR;
+ new->hittype = SPLICE;
new->genestrand = Substring_genestrand(donor);
new->chrnum = Substring_chrnum(donor);
new->chroffset = Substring_chroffset(donor);
@@ -5649,7 +6720,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
}
#else
assert(Substring_plusp(donor) == Substring_plusp(acceptor));
- assert(Substring_chimera_sensep(donor) == Substring_chimera_sensep(acceptor));
+ assert(Substring_chimera_sensedir(donor) == Substring_chimera_sensedir(acceptor));
new->plusp = Substring_plusp(donor);
#endif
@@ -5658,6 +6729,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
} else if (merge_samechr_p == false) {
new->hittype = DISTANT_SPLICE;
new->sarrayp = sarrayp;
+ new->gmap_source = GMAP_NOT_APPLICABLE;
new->improved_by_gmap_p = false;
new->chrnum = 0;
new->chroffset = 0;
@@ -5667,6 +6739,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
} else {
new->sarrayp = sarrayp;
+ new->gmap_source = GMAP_NOT_APPLICABLE;
new->improved_by_gmap_p = false;
if (Substring_chrnum(donor) == Substring_chrnum(acceptor)) {
new->hittype = SAMECHR_SPLICE;
@@ -5685,7 +6758,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->chrhigh = 0;
new->chrlength = 0;
}
-
+
/* new->plusp assigned below */
#if 0
@@ -5702,138 +6775,81 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
/* printf("Making splice with shortdistancep = %d, donor chrnum %d, and acceptor chrnum %d => chrnum %d\n",
shortdistancep,Substring_chrnum(donor),Substring_chrnum(acceptor),new->chrnum); */
-#ifdef LARGE_GENOMES
- new->ambcoords_donor = Uint8list_to_array_out(&new->nambcoords_donor,ambcoords_donor);
- new->ambcoords_acceptor = Uint8list_to_array_out(&new->nambcoords_acceptor,ambcoords_acceptor);
-#else
- new->ambcoords_donor = Uintlist_to_array_out(&new->nambcoords_donor,ambcoords_donor);
- new->ambcoords_acceptor = Uintlist_to_array_out(&new->nambcoords_acceptor,ambcoords_acceptor);
-#endif
-
- new->amb_knowni_donor = Intlist_to_array_out(&ignore,amb_knowni_donor);
- new->amb_knowni_acceptor = Intlist_to_array_out(&ignore,amb_knowni_acceptor);
- new->amb_nmismatches_donor = Intlist_to_array_out(&ignore,amb_nmismatches_donor);
- new->amb_nmismatches_acceptor = Intlist_to_array_out(&ignore,amb_nmismatches_acceptor);
- new->amb_probs_donor = Doublelist_to_array_out(&ignore,amb_probs_donor);
- new->amb_probs_acceptor = Doublelist_to_array_out(&ignore,amb_probs_acceptor);
+ donor = copy_donor_p ? Substring_copy(donor) : donor;
+ acceptor = copy_acceptor_p ? Substring_copy(acceptor) : acceptor;
-
- if (sensedir == SENSE_FORWARD) {
+ if (sensedir != SENSE_ANTI) {
+ /* SENSE_FORWARD or SENSE_NULL */
if (donor == NULL) {
new->genomicstart = Substring_genomicstart(acceptor);
new->genomicend = Substring_genomicend(acceptor);
-
- new->start_ambiguous_p = true;
- new->start_amb_length = amb_length;
- new->start_amb_prob = amb_prob;
- new->start_ambcoords = new->ambcoords_donor;
- new->start_nambcoords = new->nambcoords_donor;
- new->start_amb_knowni = new->amb_knowni_donor;
- new->start_amb_nmismatches = new->amb_nmismatches_donor;
- new->start_amb_probs = new->amb_probs_donor;
-
- new->end_ambiguous_p = false;
- new->end_amb_length = 0;
- new->end_amb_prob = 0.0;
- new->end_ambcoords = NULL;
- new->end_nambcoords = 0;
- new->end_amb_knowni = NULL;
- new->end_amb_nmismatches = NULL;
- new->end_amb_probs = NULL;
+
+ donor = Substring_new_ambig(/*querystart*/0,/*queryend*/Substring_querystart(acceptor),
+ /*splice_pos*/Substring_querystart(acceptor),querylength,
+ new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
+ /*genomiclength*/querylength,new->plusp,new->genestrand,first_read_p,
+ ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor,
+ /*amb_common_prob*/acceptor_prob,/*amb_donor_common_p*/false,
+ /*substring1p*/true);
+ debug0(printf("Making sense ambiguous donor at %d..%d with %d matches\n",
+ 0,Substring_querystart(acceptor),Substring_nmatches(donor)));
+ donor_prob = Doublelist_max(amb_probs_donor);
} else if (acceptor == NULL) {
new->genomicstart = Substring_genomicstart(donor);
new->genomicend = Substring_genomicend(donor);
- new->end_ambiguous_p = true;
- new->end_amb_length = amb_length;
- new->end_amb_prob = amb_prob;
- new->end_ambcoords = new->ambcoords_acceptor;
- new->end_nambcoords = new->nambcoords_acceptor;
- new->end_amb_knowni = new->amb_knowni_acceptor;
- new->end_amb_nmismatches = new->amb_nmismatches_acceptor;
- new->end_amb_probs = new->amb_probs_acceptor;
-
- new->start_ambiguous_p = false;
- new->start_amb_length = 0;
- new->start_amb_prob = 0.0;
- new->start_ambcoords = NULL;
- new->start_nambcoords = 0;
- new->start_amb_knowni = NULL;
- new->start_amb_nmismatches = NULL;
- new->start_amb_probs = NULL;
+ acceptor = Substring_new_ambig(/*querystart*/Substring_queryend(donor),/*queryend*/querylength,
+ /*splice_pos*/Substring_queryend(donor),querylength,
+ new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
+ /*genomiclength*/querylength,new->plusp,new->genestrand,first_read_p,
+ ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor,
+ /*amb_common_prob*/donor_prob,/*amb_donor_common_p*/true,
+ /*substring1p*/false);
+ debug0(printf("Making sense ambiguous donor at %d..%d with %d matches\n",
+ Substring_queryend(donor),querylength,Substring_nmatches(acceptor)));
+ acceptor_prob = Doublelist_max(amb_probs_acceptor);
} else {
new->genomicstart = Substring_genomicstart(donor);
new->genomicend = Substring_genomicend(acceptor);
-
- new->start_ambiguous_p = new->end_ambiguous_p = false;
- new->start_amb_length = new->end_amb_length = 0;
- new->start_amb_prob = new->end_amb_prob = 0.0;
- new->start_ambcoords = new->end_ambcoords = NULL;
- new->start_nambcoords = new->end_nambcoords = 0;
- new->start_amb_knowni = new->end_amb_knowni = NULL;
- new->start_amb_nmismatches = new->end_amb_nmismatches = NULL;
- new->start_amb_probs = new->end_amb_probs = NULL;
}
} else {
+ /* SENSE_ANTI */
if (donor == NULL) {
new->genomicstart = Substring_genomicstart(acceptor);
new->genomicend = Substring_genomicend(acceptor);
- new->end_ambiguous_p = true;
- new->end_amb_length = amb_length;
- new->end_amb_prob = amb_prob;
- new->end_ambcoords = new->ambcoords_donor;
- new->end_nambcoords = new->nambcoords_donor;
- new->end_amb_knowni = new->amb_knowni_donor;
- new->end_amb_nmismatches = new->amb_nmismatches_donor;
- new->end_amb_probs = new->amb_probs_donor;
-
- new->start_ambiguous_p = false;
- new->start_amb_length = 0;
- new->start_amb_prob = 0.0;
- new->start_ambcoords = NULL;
- new->start_nambcoords = 0;
- new->start_amb_knowni = NULL;
- new->start_amb_nmismatches = NULL;
- new->start_amb_probs = NULL;
+ donor = Substring_new_ambig(/*querystart*/Substring_queryend(acceptor),/*queryend*/querylength,
+ /*splice_pos*/Substring_queryend(acceptor),querylength,
+ new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
+ /*genomiclength*/querylength,new->plusp,new->genestrand,first_read_p,
+ ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor,
+ /*amb_common_prob*/acceptor_prob,/*amb_donor_common_p*/false,
+ /*substring1p*/false);
+ debug0(printf("Making antisense ambiguous donor at %d..%d with %d matches\n",
+ Substring_queryend(acceptor),querylength,Substring_nmatches(donor)));
+ donor_prob = Doublelist_max(amb_probs_donor);
} else if (acceptor == NULL) {
new->genomicstart = Substring_genomicstart(donor);
new->genomicend = Substring_genomicend(donor);
- new->start_ambiguous_p = true;
- new->start_amb_length = amb_length;
- new->start_amb_prob = amb_prob;
- new->start_ambcoords = new->ambcoords_acceptor;
- new->start_nambcoords = new->nambcoords_acceptor;
- new->start_amb_knowni = new->amb_knowni_acceptor;
- new->start_amb_nmismatches = new->amb_nmismatches_acceptor;
- new->start_amb_probs = new->amb_probs_acceptor;
-
- new->end_ambiguous_p = false;
- new->end_amb_length = 0;
- new->end_amb_prob = 0.0;
- new->end_ambcoords = NULL;
- new->end_nambcoords = 0;
- new->end_amb_knowni = NULL;
- new->end_amb_nmismatches = NULL;
- new->end_amb_probs = NULL;
+ acceptor = Substring_new_ambig(/*querystart*/0,/*queryend*/Substring_querystart(donor),
+ /*splice_pos*/Substring_querystart(donor),querylength,
+ new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
+ /*genomiclength*/querylength,new->plusp,new->genestrand,first_read_p,
+ ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor,
+ /*amb_common_prob*/donor_prob,/*amb_donor_common_p*/true,
+ /*substring1p*/true);
+ debug0(printf("Making antisense ambiguous acceptor at %d..%d with %d matches\n",
+ 0,Substring_querystart(donor),Substring_nmatches(acceptor)));
+ acceptor_prob = Doublelist_max(amb_probs_acceptor);
} else {
new->genomicstart = Substring_genomicstart(acceptor);
new->genomicend = Substring_genomicend(donor);
-
- new->start_amb_length = new->end_amb_length = 0;
- new->start_amb_prob = new->end_amb_prob = 0.0;
- new->start_ambiguous_p = new->end_ambiguous_p = false;
- new->start_ambcoords = new->end_ambcoords = NULL;
- new->start_nambcoords = new->end_nambcoords = 0;
- new->start_amb_knowni = new->end_amb_knowni = NULL;
- new->start_amb_nmismatches = new->end_amb_nmismatches = NULL;
- new->start_amb_probs = new->end_amb_probs = NULL;
}
}
@@ -5849,9 +6865,11 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
debug0(printf(" hittype is %s, plusp %d, genomicpos %u..%u\n",
hittype_string(new->hittype),new->plusp,new->genomicstart - new->chroffset,new->genomicend - new->chroffset));
+#if 0
debug0(printf("start_ambiguous_p %d (%d starts), end_ambiguous_p %d (%d ends)\n",
new->start_ambiguous_p,new->start_nambcoords,new->end_ambiguous_p,new->end_nambcoords));
-#ifdef DEBUG0
+#endif
+#if 0
for (i = 0; i < new->start_nambcoords; i++) {
printf("amb start %u\n",new->start_ambcoords[i]);
}
@@ -5859,92 +6877,52 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
printf("amb end %u\n",new->end_ambcoords[i]);
}
#endif
- debug0(printf("start_amb_length %d, end_amb_length %d\n",new->start_amb_length,new->end_amb_length));
-
-#ifdef CHECK_ASSERTIONS
- if (new->start_ambiguous_p == true && new->start_nambcoords == 0) {
- abort();
- }
- if (new->end_ambiguous_p == true && new->end_nambcoords == 0) {
- abort();
- }
-#endif
new->genomiclength = new->high - new->low;
new->guided_insertlength = 0U;
- new->nchimera_known = Substring_nchimera_known(donor) + Substring_nchimera_known(acceptor);
- new->nchimera_novel = Substring_nchimera_novel(donor) + Substring_nchimera_novel(acceptor);
-#if 0
- /* Adversely affects comparison based on nchimera_known */
- if (new->start_ambiguous_p == true && favor_ambiguous_p == true) {
- new->nchimera_known++;
- /* new->nchimera_novel--; */
- }
- if (new->end_ambiguous_p == true && favor_ambiguous_p == true) {
- new->nchimera_known++;
- /* new->nchimera_novel--; */
- }
-#endif
+ new->nsplices = 1;
if (new->chrnum == 0) {
/* Previously also did this for (donor != NULL && acceptor != NULL && shortdistancep == false), but this led to the wrong chrpos for SAM output */
/* Checking for merge_samechr_p leads to wrong mappingstart and mappingend for running GMAP */
/* Always want the original query end */
- if (first_read_p == true) {
- if (invert_first_p == false) {
- if (Substring_queryend(acceptor) > Substring_queryend(donor)) {
- substring_for_concordance = acceptor;
- new->substring_LtoH = List_push(NULL,(void *) new->substring_acceptor);
- new->effective_chrnum = Substring_chrnum(acceptor);
- new->other_chrnum = Substring_chrnum(donor);
- } else {
- substring_for_concordance = donor;
- new->substring_LtoH = List_push(NULL,(void *) new->substring_donor);
- new->effective_chrnum = Substring_chrnum(donor);
- new->other_chrnum = Substring_chrnum(acceptor);
- }
- } else {
- if (Substring_querystart(acceptor) < Substring_querystart(donor)) {
- substring_for_concordance = acceptor;
- new->substring_LtoH = List_push(NULL,(void *) new->substring_acceptor);
- new->effective_chrnum = Substring_chrnum(acceptor);
- new->other_chrnum = Substring_chrnum(donor);
- } else {
- substring_for_concordance = donor;
- new->substring_LtoH = List_push(NULL,(void *) new->substring_donor);
- new->effective_chrnum = Substring_chrnum(donor);
- new->other_chrnum = Substring_chrnum(acceptor);
- }
- }
+ junction = Junction_new_chimera(sensedir,donor_prob,acceptor_prob);
+ new->junctions_LtoH = List_push(NULL,junction);
+ new->junctions_1toN = List_push(NULL,junction);
+ new->junctions_Nto1 = List_push(NULL,junction);
+ /* For translocations, LtoH makes no sense, so we rely upon 1toN and force LtoH to be the same as 1toN */
+ debug0(printf("donor querypos %d..%d\n",Substring_querystart(donor),Substring_queryend(donor)));
+ debug0(printf("acceptor querypos %d..%d\n",Substring_querystart(acceptor),Substring_queryend(acceptor)));
+
+ if (Substring_querystart(donor) < Substring_querystart(acceptor)) {
+ new->sensedir = SENSE_FORWARD;
+ new->substrings_LtoH = List_push(NULL,(void *) acceptor);
+ new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) donor);
} else {
- if (invert_second_p == false) {
- if (Substring_queryend(acceptor) > Substring_queryend(donor)) {
- substring_for_concordance = acceptor;
- new->substring_LtoH = List_push(NULL,(void *) new->substring_acceptor);
- new->effective_chrnum = Substring_chrnum(acceptor);
- new->other_chrnum = Substring_chrnum(donor);
- } else {
- substring_for_concordance = donor;
- new->substring_LtoH = List_push(NULL,(void *) new->substring_donor);
- new->effective_chrnum = Substring_chrnum(donor);
- new->other_chrnum = Substring_chrnum(acceptor);
- }
- } else {
- if (Substring_querystart(acceptor) < Substring_querystart(donor)) {
- substring_for_concordance = acceptor;
- new->substring_LtoH = List_push(NULL,(void *) new->substring_acceptor);
- new->effective_chrnum = Substring_chrnum(acceptor);
- new->other_chrnum = Substring_chrnum(donor);
- } else {
- substring_for_concordance = donor;
- new->substring_LtoH = List_push(NULL,(void *) new->substring_donor);
- new->effective_chrnum = Substring_chrnum(donor);
- new->other_chrnum = Substring_chrnum(acceptor);
- }
- }
+ new->sensedir = SENSE_ANTI;
+ new->substrings_LtoH = List_push(NULL,(void *) donor);
+ new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) acceptor);
+ }
+ debug0(printf("sensedir %d\n",new->sensedir));
+
+
+ new->substrings_1toN = List_copy(new->substrings_LtoH);
+ new->substrings_Nto1 = List_reverse(List_copy(new->substrings_LtoH));
+ assert(Substring_querystart(List_head(new->substrings_1toN)) < Substring_querystart(List_head(new->substrings_Nto1)));
+
+
+ if (first_read_p == true) {
+ substring_for_concordance = (Substring_T) List_head(new->substrings_Nto1);
+ substring_other = (Substring_T) List_head(new->substrings_1toN);
+ } else {
+ substring_for_concordance = (Substring_T) List_head(new->substrings_1toN);
+ substring_other = (Substring_T) List_head(new->substrings_Nto1);
}
+ new->effective_chrnum = Substring_chrnum(substring_for_concordance);
+ new->other_chrnum = Substring_chrnum(substring_other);
+
/* Redefine based on inner substring */
new->genomicstart = Substring_genomicstart(substring_for_concordance);
@@ -5956,59 +6934,67 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->effective_chrnum = new->chrnum;
new->other_chrnum = 0;
- if (donor == NULL) {
- new->substring_LtoH = List_push(NULL,(void *) new->substring1);
- } else if (acceptor == NULL) {
- new->substring_LtoH = List_push(NULL,(void *) new->substring1);
- } else if (sensedir == SENSE_FORWARD) {
-#if 0
- if (new->plusp == true) {
- new->substring_low = new->substring_donor;
- new->substring_high = new->substring_acceptor;
- } else {
- new->substring_low = new->substring_acceptor;
- new->substring_high = new->substring_donor;
- }
-#else
+ new->substrings_LtoH = (List_T) NULL;
+ new->junctions_LtoH = (List_T) NULL;
+ new->sensedir = sensedir;
+
+ if (sensedir != SENSE_ANTI) {
+ /* SENSE_FORWARD or SENSE_NULL */
if (new->plusp == true) {
/* Order is donor, acceptor. Same as substring1, substring2, as expected */
- new->substring_LtoH = List_push(List_push(NULL,(void *) new->substring_acceptor),new->substring_donor);
+ new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) acceptor);
+ junction = Junction_new_splice(distance,sensedir,donor_prob,acceptor_prob);
+ new->junctions_LtoH = List_push(new->junctions_LtoH,(void *) junction);
+ new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) donor);
+
} else {
/* Order is acceptor, donor. Same as substring2, substring1, as expected */
- new->substring_LtoH = List_push(List_push(NULL,(void *) new->substring_donor),new->substring_acceptor);
+ new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) donor);
+ junction = Junction_new_splice(distance,sensedir,donor_prob,acceptor_prob);
+ new->junctions_LtoH = List_push(new->junctions_LtoH,(void *) junction);
+ new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) acceptor);
}
-#endif
- } else if (sensedir == SENSE_ANTI) {
-#if 0
- if (new->plusp == true) {
- new->substring_low = new->substring_acceptor;
- new->substring_high = new->substring_donor;
- } else {
- new->substring_low = new->substring_donor;
- new->substring_high = new->substring_acceptor;
- }
-#else
+ } else {
+ /* SENSE_ANTI */
if (new->plusp == true) {
/* Order is acceptor, donor. Same as substring1, substring2, as expected */
- new->substring_LtoH = List_push(List_push(NULL,(void *) new->substring_donor),new->substring_acceptor);
+ new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) donor);
+ junction = Junction_new_splice(distance,sensedir,donor_prob,acceptor_prob);
+ new->junctions_LtoH = List_push(new->junctions_LtoH,(void *) junction);
+ new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) acceptor);
} else {
/* Order is donor, acceptor. Same as substring2, substring1, as expected */
- new->substring_LtoH = List_push(List_push(NULL,(void *) new->substring_acceptor),new->substring_donor);
+ new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) acceptor);
+ junction = Junction_new_splice(distance,sensedir,donor_prob,acceptor_prob);
+ new->junctions_LtoH = List_push(new->junctions_LtoH,(void *) junction);
+ new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) donor);
}
-#endif
+ }
+ if (new->plusp == true) {
+ new->substrings_1toN = List_copy(new->substrings_LtoH);
+ new->substrings_Nto1 = List_reverse(List_copy(new->substrings_LtoH));
+ new->junctions_1toN = List_copy(new->junctions_LtoH);
+ new->junctions_Nto1 = List_reverse(List_copy(new->junctions_LtoH));
} else {
- abort();
+ new->substrings_1toN = List_reverse(List_copy(new->substrings_LtoH));
+ new->substrings_Nto1 = List_copy(new->substrings_LtoH);
+ new->junctions_1toN = List_reverse(List_copy(new->junctions_LtoH));
+ new->junctions_Nto1 = List_copy(new->junctions_LtoH);
}
}
+
new->nmismatches_whole = nmismatches_donor + nmismatches_acceptor;
new->score = new->ntscore = splicing_penalty + new->nmismatches_whole;
+#if 0
if (sensedir == SENSE_FORWARD) {
new->score += antistranded_penalty;
}
+#endif
+#if 0
if (donor == NULL) {
/* new->mapq_loglik = Substring_mapq_loglik(acceptor); */
new->nmismatches_bothdiff = Substring_nmismatches_bothdiff(acceptor) + nmismatches_donor;
@@ -6033,36 +7019,26 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->sensedir_nonamb = SENSE_NULL; /* Ignore sense based on ambiguous end */
debug0(printf("New splice has donor %d + amb %d matches, sensedir nonamb %d\n",
Substring_nmatches(donor),amb_length,new->sensedir_nonamb));
- } else {
- /* new->mapq_loglik = Substring_mapq_loglik(donor) + Substring_mapq_loglik(acceptor); */
- new->nmismatches_bothdiff = Substring_nmismatches_bothdiff(donor) + Substring_nmismatches_bothdiff(acceptor);
- /* new->nmismatches_refdiff = Substring_nmismatches_refdiff(donor) + Substring_nmismatches_refdiff(acceptor); */
- new->nmatches = Substring_nmatches(donor) + Substring_nmatches(acceptor);
- new->nmatches_posttrim = Substring_nmatches_posttrim(donor) + Substring_nmatches_posttrim(acceptor);
- new->sensedir_nonamb = sensedir;
- debug0(printf("New splice has donor %d + acceptor %d matches, sensedir nonamb %d\n",
- Substring_nmatches(donor),Substring_nmatches(acceptor),new->sensedir_nonamb));
}
- new->sensedir = sensedir;
+#endif
- if (new->substring0 != NULL) {
- new->trim_left = Substring_trim_left(new->substring0);
- new->trim_left_splicep = Substring_trim_left_splicep(new->substring0);
- } else {
- new->trim_left = Substring_trim_left(new->substring1);
- new->trim_left_splicep = Substring_trim_left_splicep(new->substring1);
- }
+ /* new->mapq_loglik = Substring_mapq_loglik(donor) + Substring_mapq_loglik(acceptor); */
+ new->nmismatches_bothdiff = Substring_nmismatches_bothdiff(donor) + Substring_nmismatches_bothdiff(acceptor);
+ /* new->nmismatches_refdiff = Substring_nmismatches_refdiff(donor) + Substring_nmismatches_refdiff(acceptor); */
+ new->nmatches = Substring_nmatches(donor) + Substring_nmatches(acceptor);
+ new->nmatches_posttrim = Substring_nmatches_posttrim(donor) + Substring_nmatches_posttrim(acceptor);
+ debug0(printf("New splice has donor %d + acceptor %d matches, sensedir %d\n",
+ Substring_nmatches(donor),Substring_nmatches(acceptor),new->sensedir));
- if (new->substring2 != NULL) {
- new->trim_right = Substring_trim_right(new->substring2);
- new->trim_right_splicep = Substring_trim_right_splicep(new->substring2);
- } else {
- new->trim_right = Substring_trim_right(new->substring1);
- new->trim_right_splicep = Substring_trim_right_splicep(new->substring1);
- }
-
- new->penalties = splicing_penalty;
+ substring = (Substring_T) List_head(new->substrings_1toN);
+ new->trim_left = Substring_trim_left(substring);
+ new->trim_left_splicep = Substring_trim_left_splicep(substring);
+ substring = (Substring_T) List_head(new->substrings_Nto1);
+ new->trim_right = Substring_trim_right(substring);
+ new->trim_right_splicep = Substring_trim_right_splicep(substring);
+
+ /* new->penalties = splicing_penalty; */
/* new->gene_overlap = NO_KNOWN_GENE; -- initialized later when resolving multimappers */
new->tally = -1L;
@@ -6085,23 +7061,24 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
new->circularpos = compute_circularpos(&new->alias,new);
- assert(new->substring1 != NULL);
-
debug0(printf("Returning new splice %p at genomic %u..%u, donor %p (%u => %u), acceptor %p (%u => %u)\n",
- new,new->genomicstart - new->chroffset,new->genomicend - new->chroffset,new->substring_donor,
- new->substring_donor == NULL ? 0 : Substring_left_genomicseg(new->substring_donor),
- new->substring_donor == NULL ? 0 : Substring_splicecoord(new->substring_donor),
- new->substring_acceptor,new->substring_acceptor == NULL ? 0 : Substring_left_genomicseg(new->substring_acceptor),
- new->substring_acceptor == NULL ? 0 : Substring_splicecoord(new->substring_acceptor)));
+ new,new->genomicstart - new->chroffset,new->genomicend - new->chroffset,donor,
+ donor == NULL ? 0 : Substring_left_genomicseg(donor),
+ donor == NULL ? 0 : Substring_splicecoord(donor),
+ acceptor,acceptor == NULL ? 0 : Substring_left_genomicseg(acceptor),
+ acceptor == NULL ? 0 : Substring_splicecoord(acceptor)));
+
+ debug0(printf("sensedir %d\n",new->sensedir));
return new;
}
/* Never returns NULL. Never copies substrings. Always shortdistance. */
-/* Donor ----(A distance)---- [A Shortexon D] ----(D distance)---- Acceptor */
+/* [E Donor D] ----(junction)---- [A Shortexon D] ----(junction)---- [A Acceptor E] */
T
Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T acceptor, Substring_T shortexon,
+ double donor_prob, double shortexonA_prob, double shortexonD_prob, double acceptor_prob,
int amb_length_donor, int amb_length_acceptor, double amb_prob_donor, double amb_prob_acceptor,
#ifdef LARGE_GENOMES
Uint8list_T ambcoords_donor, Uint8list_T ambcoords_acceptor,
@@ -6112,169 +7089,183 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
Intlist_T amb_nmismatches_donor, Intlist_T amb_nmismatches_acceptor,
Doublelist_T amb_probs_donor, Doublelist_T amb_probs_acceptor,
bool copy_donor_p, bool copy_acceptor_p, bool copy_shortexon_p,
- int splicing_penalty, int querylength, int sensedir, bool sarrayp) {
+ int splicing_penalty, int querylength, bool first_read_p, int sensedir, bool sarrayp) {
T new;
int ignore;
+ Substring_T substring, substring0, substring1, substring2;
+ Chrpos_T distance;
+ Junction_T junction0, junction2;
new = (T) MALLOC_OUT(sizeof(*new));
- debug0(printf("Stage3end_new_shortexon %p, amb_donor %d, amb_acceptor %d, sensedir %d\n",
- new,amb_length_donor,amb_length_acceptor,sensedir));
+ debug0(printf("Stage3end_new_shortexon %p, amb_donor %d, amb_acceptor %d, sensedir %d, sarrayp %d\n",
+ new,amb_length_donor,amb_length_acceptor,sensedir,sarrayp));
assert(Substring_match_length_orig(donor) + Substring_match_length_orig(shortexon) + Substring_match_length_orig(acceptor) +
amb_length_donor + amb_length_acceptor == querylength);
- new->deletion = (char *) NULL;
new->querylength_adj = new->querylength = querylength;
- new->genestrand = Substring_genestrand(shortexon);
new->sarrayp = sarrayp;
+ new->gmap_source = GMAP_NOT_APPLICABLE;
new->improved_by_gmap_p = false;
+
+#if 0
if (donor == NULL && acceptor == NULL) {
new->hittype = ONE_THIRD_SHORTEXON;
- new->substring1 = copy_shortexon_p ? Substring_copy(shortexon) : shortexon;
- new->substring2 = (Substring_T) NULL;
- new->substring0 = (Substring_T) NULL;
- new->substring_donor = new->substring_acceptor = (Substring_T) NULL;
- new->substringD = new->substringA = (Substring_T) NULL;
- new->sensedir_nonamb = SENSE_NULL; /* Ignore sensedir based on double ambiguous ends */
new->shortexonA_distance = 0;
new->shortexonD_distance = 0;
- } else {
- if (donor == NULL) {
- new->hittype = TWO_THIRDS_SHORTEXON;
- } else if (acceptor == NULL) {
- new->hittype = TWO_THIRDS_SHORTEXON;
- } else {
- new->hittype = SHORTEXON;
- }
-
- /* Compute distances */
- if (donor == NULL) {
- new->shortexonA_distance = 0;
- } else if (Substring_splicecoord_A(shortexon) > Substring_splicecoord(donor)) {
- new->shortexonA_distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord(donor);
- } else {
- new->shortexonA_distance = Substring_splicecoord(donor) - Substring_splicecoord_A(shortexon);
- }
-
- if (acceptor == NULL) {
- new->shortexonD_distance = 0;
- } else if (Substring_splicecoord_D(shortexon) > Substring_splicecoord(acceptor)) {
- new->shortexonD_distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord(acceptor);
- } else {
- new->shortexonD_distance = Substring_splicecoord(acceptor) - Substring_splicecoord_D(shortexon);
- }
- new->distance = new->shortexonA_distance + new->shortexonD_distance;
-
- new->substring1 = copy_shortexon_p ? Substring_copy(shortexon) : shortexon;
- if (sensedir == SENSE_FORWARD) {
- new->substringD = new->substring0 = copy_donor_p ? Substring_copy(donor) : donor;
- new->substringA = new->substring2 = copy_acceptor_p ? Substring_copy(acceptor) : acceptor;
- } else if (sensedir == SENSE_ANTI) {
- new->substringA = new->substring0 = copy_acceptor_p ? Substring_copy(acceptor) : acceptor;
- new->substringD = new->substring2 = copy_donor_p ? Substring_copy(donor) : donor;
- } else {
- abort();
- }
- new->substring_donor = new->substring_acceptor = (Substring_T) NULL;
- new->sensedir_nonamb = sensedir;
}
- new->sensedir = sensedir;
+#endif
#if 0
- Substring_assign_shortexon_prob(new->substring1);
- if (new->substringD != NULL) {
- Substring_assign_donor_prob(new->substringD);
- }
- if (new->substringA != NULL) {
- Substring_assign_acceptor_prob(new->substringA);
+ if (donor == NULL) {
+ new->hittype = TWO_THIRDS_SHORTEXON;
+ } else if (acceptor == NULL) {
+ new->hittype = TWO_THIRDS_SHORTEXON;
+ } else {
+ new->hittype = SHORTEXON;
}
+#else
+ new->hittype = SUBSTRINGS;
#endif
+
new->pairarray = (struct Pair_T *) NULL;
+ new->cigar_tokens = (List_T) NULL;
+ new->gmap_intronp = false;
new->nindels = 0;
- new->indel_pos = 0;
- new->indel_low = 0;
new->chrnum = Substring_chrnum(shortexon);
new->chroffset = Substring_chroffset(shortexon);
new->chrhigh = Substring_chrhigh(shortexon);
new->chrlength = Substring_chrlength(shortexon);
new->plusp = Substring_plusp(shortexon);
+ new->genestrand = Substring_genestrand(shortexon);
- /* printf("Making splice with shortdistancep = %d, donor chrnum %d, and acceptor chrnum %d => chrnum %d\n",
- shortdistancep,Substring_chrnum(donor),Substring_chrnum(acceptor),new->chrnum); */
-
-
- new->amb_length_donor = amb_length_donor;
- new->amb_length_acceptor = amb_length_acceptor;
- new->amb_prob_donor = amb_prob_donor;
- new->amb_prob_acceptor = amb_prob_acceptor;
-
-#ifdef LARGE_GENOMES
- new->ambcoords_donor = Uint8list_to_array_out(&new->nambcoords_donor,ambcoords_donor);
- new->ambcoords_acceptor = Uint8list_to_array_out(&new->nambcoords_acceptor,ambcoords_acceptor);
-#else
- new->ambcoords_donor = Uintlist_to_array_out(&new->nambcoords_donor,ambcoords_donor);
- new->ambcoords_acceptor = Uintlist_to_array_out(&new->nambcoords_acceptor,ambcoords_acceptor);
-#endif
- new->amb_knowni_donor = Intlist_to_array_out(&ignore,amb_knowni_donor);
- new->amb_knowni_acceptor = Intlist_to_array_out(&ignore,amb_knowni_acceptor);
- new->amb_nmismatches_donor = Intlist_to_array_out(&ignore,amb_nmismatches_donor);
- new->amb_nmismatches_acceptor = Intlist_to_array_out(&ignore,amb_nmismatches_acceptor);
- new->amb_probs_donor = Doublelist_to_array_out(&ignore,amb_probs_donor);
- new->amb_probs_acceptor = Doublelist_to_array_out(&ignore,amb_probs_acceptor);
+ /* Compute distances */
+ if (donor == NULL) {
+ new->shortexonA_distance = 0;
+ } else if (Substring_splicecoord_A(shortexon) > Substring_splicecoord(donor)) {
+ new->shortexonA_distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord(donor);
+ } else {
+ new->shortexonA_distance = Substring_splicecoord(donor) - Substring_splicecoord_A(shortexon);
+ }
+ if (acceptor == NULL) {
+ new->shortexonD_distance = 0;
+ } else if (Substring_splicecoord_D(shortexon) > Substring_splicecoord(acceptor)) {
+ new->shortexonD_distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord(acceptor);
+ } else {
+ new->shortexonD_distance = Substring_splicecoord(acceptor) - Substring_splicecoord_D(shortexon);
+ }
+ new->distance = new->shortexonA_distance + new->shortexonD_distance;
if (sensedir == SENSE_FORWARD) {
new->genomicstart = (donor != NULL ? Substring_genomicstart(donor) : Substring_genomicstart(shortexon));
new->genomicend = (acceptor != NULL ? Substring_genomicend(acceptor) : Substring_genomicend(shortexon));
- new->start_amb_length = new->amb_length_donor;
- new->start_amb_prob = new->amb_prob_donor;
- new->start_ambcoords = new->ambcoords_donor;
- new->start_nambcoords = new->nambcoords_donor;
- new->start_amb_knowni = new->amb_knowni_donor;
- new->start_amb_nmismatches = new->amb_nmismatches_donor;
- new->start_amb_probs = new->amb_probs_donor;
+ } else if (sensedir == SENSE_ANTI) {
+ new->genomicstart = (acceptor != NULL ? Substring_genomicstart(acceptor) : Substring_genomicstart(shortexon));
+ new->genomicend = (donor != NULL ? Substring_genomicend(donor) : Substring_genomicend(shortexon));
- new->end_amb_length = new->amb_length_acceptor;
- new->end_amb_prob = new->amb_prob_acceptor;
- new->end_ambcoords = new->ambcoords_acceptor;
- new->end_nambcoords = new->nambcoords_acceptor;
- new->end_amb_knowni = new->amb_knowni_acceptor;
- new->end_amb_nmismatches = new->amb_nmismatches_acceptor;
- new->end_amb_probs = new->amb_probs_acceptor;
+ } else {
+ abort();
+ }
- new->start_ambiguous_p = (ambcoords_donor != NULL) ? true : false;
- new->end_ambiguous_p = (ambcoords_acceptor != NULL) ? true : false;
+ substring1 = copy_shortexon_p ? Substring_copy(shortexon) : shortexon;
+ if (sensedir == SENSE_FORWARD) {
+ substring0 = copy_donor_p ? Substring_copy(donor) : donor;
+ if (donor == NULL) {
+ donor = substring0 = Substring_new_ambig(/*querystart*/0,/*queryend*/Substring_querystart(shortexon),
+ /*splice_pos*/Substring_querystart(shortexon),querylength,
+ new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
+ /*genomiclength*/querylength,new->plusp,new->genestrand,first_read_p,
+ ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor,
+ /*amb_common_prob*/acceptor_prob,/*amb_donor_common_p*/false,
+ /*substring1p*/true);
+ /* new->start_amb_prob = Doublelist_max(amb_probs_donor); */
+ /* new->start_amb_length = amb_length_donor; */
+ junction0 = Junction_new_splice(/*distance*/0,sensedir,Doublelist_max(amb_probs_donor),shortexonA_prob);
+ } else if (Substring_splicecoord_A(shortexon) > Substring_splicecoord(donor)) {
+ distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord(donor);
+ junction0 = Junction_new_splice(distance,sensedir,donor_prob,shortexonA_prob);
+ } else {
+ distance = Substring_splicecoord(donor) - Substring_splicecoord_A(shortexon);
+ junction0 = Junction_new_splice(distance,sensedir,donor_prob,shortexonA_prob);
+ }
- } else {
- new->genomicstart = (acceptor != NULL ? Substring_genomicstart(acceptor) : Substring_genomicstart(shortexon));
- new->genomicend = (donor != NULL ? Substring_genomicend(donor) : Substring_genomicend(shortexon));
+ substring2 = copy_acceptor_p ? Substring_copy(acceptor) : acceptor;
+ if (acceptor == NULL) {
+ acceptor = substring2 = Substring_new_ambig(/*querystart*/Substring_queryend(shortexon),/*queryend*/querylength,
+ /*splice_pos*/Substring_queryend(shortexon),querylength,
+ new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
+ /*genomiclength*/querylength,new->plusp,new->genestrand,first_read_p,
+ ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor,
+ /*amb_common_prob*/donor_prob,/*amb_donor_common_p*/true,
+ /*substring1p*/false);
+ /* new->end_amb_prob = Doublelist_max(amb_probs_acceptor); */
+ /* new->end_amb_length = amb_length_acceptor; */
+ junction2 = Junction_new_splice(/*distance*/0,sensedir,shortexonD_prob,Doublelist_max(amb_probs_acceptor));
+ } else if (Substring_splicecoord_D(shortexon) > Substring_splicecoord(acceptor)) {
+ distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord(acceptor);
+ junction2 = Junction_new_splice(distance,sensedir,shortexonD_prob,acceptor_prob);
+ } else {
+ distance = Substring_splicecoord(acceptor) - Substring_splicecoord_D(shortexon);
+ junction2 = Junction_new_splice(distance,sensedir,shortexonD_prob,acceptor_prob);
+ }
- new->start_amb_length = new->amb_length_acceptor;
- new->start_amb_prob = new->amb_prob_acceptor;
- new->start_ambcoords = new->ambcoords_acceptor;
- new->start_nambcoords = new->nambcoords_acceptor;
- new->start_amb_knowni = new->amb_knowni_acceptor;
- new->start_amb_nmismatches = new->amb_nmismatches_acceptor;
- new->start_amb_probs = new->amb_probs_acceptor;
+ } else if (sensedir == SENSE_ANTI) {
+ substring0 = copy_acceptor_p ? Substring_copy(acceptor) : acceptor;
+ if (acceptor == NULL) {
+ acceptor = substring0 = Substring_new_ambig(/*querystart*/0,/*queryend*/Substring_querystart(shortexon),
+ /*splice_pos*/Substring_querystart(shortexon),querylength,
+ new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
+ /*genomiclength*/querylength,new->plusp,new->genestrand,first_read_p,
+ ambcoords_acceptor,amb_knowni_acceptor,amb_nmismatches_acceptor,amb_probs_acceptor,
+ /*amb_common_prob*/donor_prob,/*amb_donor_common_p*/true,
+ /*substring1p*/true);
+ /* new->start_amb_prob = Doublelist_max(amb_probs_acceptor); */
+ /* new->start_amb_length = amb_length_acceptor; */
+ junction0 = Junction_new_splice(/*distance*/0,sensedir,shortexonD_prob,Doublelist_max(amb_probs_acceptor));
+ } else if (Substring_splicecoord_D(shortexon) > Substring_splicecoord(acceptor)) {
+ distance = Substring_splicecoord_D(shortexon) - Substring_splicecoord(acceptor);
+ junction0 = Junction_new_splice(distance,sensedir,shortexonD_prob,acceptor_prob);
+ } else {
+ distance = Substring_splicecoord(acceptor) - Substring_splicecoord_D(shortexon);
+ junction0 = Junction_new_splice(distance,sensedir,shortexonD_prob,acceptor_prob);
+ }
- new->end_amb_length = new->amb_length_donor;
- new->end_amb_prob = new->amb_prob_donor;
- new->end_ambcoords = new->ambcoords_donor;
- new->end_nambcoords = new->nambcoords_donor;
- new->end_amb_knowni = new->amb_knowni_donor;
- new->end_amb_nmismatches = new->amb_nmismatches_donor;
- new->end_amb_probs = new->amb_probs_donor;
+ substring2 = copy_donor_p ? Substring_copy(donor) : donor;
+ if (donor == NULL) {
+ donor = substring2 = Substring_new_ambig(/*querystart*/Substring_queryend(shortexon),/*queryend*/querylength,
+ /*splice_pos*/Substring_queryend(shortexon),querylength,
+ new->chrnum,new->chroffset,new->chrhigh,new->chrlength,
+ /*genomiclength*/querylength,new->plusp,new->genestrand,first_read_p,
+ ambcoords_donor,amb_knowni_donor,amb_nmismatches_donor,amb_probs_donor,
+ /*amb_common_prob*/acceptor_prob,/*amb_donor_common_p*/false,
+ /*substring1p*/false);
+ /* new->end_amb_prob = Doublelist_max(amb_probs_donor); */
+ /* new->end_amb_length = amb_length_donor; */
+ junction2 = Junction_new_splice(/*distance*/0,sensedir,Doublelist_max(amb_probs_donor),shortexonA_prob);
+ } else if (Substring_splicecoord_A(shortexon) > Substring_splicecoord(donor)) {
+ distance = Substring_splicecoord_A(shortexon) - Substring_splicecoord(donor);
+ junction2 = Junction_new_splice(distance,sensedir,donor_prob,shortexonA_prob);
+ } else {
+ distance = Substring_splicecoord(donor) - Substring_splicecoord_A(shortexon);
+ junction2 = Junction_new_splice(distance,sensedir,donor_prob,shortexonA_prob);
+ }
- new->start_ambiguous_p = (ambcoords_acceptor != NULL) ? true : false;
- new->end_ambiguous_p = (ambcoords_donor != NULL) ? true : false;
+ } else {
+ abort();
}
+ new->sensedir = sensedir;
+
+ /* printf("Making splice with shortdistancep = %d, donor chrnum %d, and acceptor chrnum %d => chrnum %d\n",
+ shortdistancep,Substring_chrnum(donor),Substring_chrnum(acceptor),new->chrnum); */
+
if (new->genomicstart < new->genomicend) {
debug0(printf("plus %s\n",print_sense(sensedir)));
@@ -6289,25 +7280,12 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
debug0(printf(" hittype is %s, genomicpos %u..%u\n",
hittype_string(new->hittype),new->genomicstart - new->chroffset,new->genomicend - new->chroffset));
- debug0(printf("start_ambiguous_p %d, end_ambiguous_p %d\n",new->start_ambiguous_p,new->end_ambiguous_p));
- debug0(printf("start_amb_length %d, end_amb_length %d\n",new->start_amb_length,new->end_amb_length));
+ /* debug0(printf("start_ambiguous_p %d, end_ambiguous_p %d\n",new->start_ambiguous_p,new->end_ambiguous_p)); */
new->genomiclength = new->high - new->low;
new->guided_insertlength = 0U;
- new->nchimera_known = Substring_nchimera_known(shortexon) + Substring_nchimera_known(donor) + Substring_nchimera_known(acceptor);
- new->nchimera_novel = Substring_nchimera_novel(shortexon) + Substring_nchimera_novel(donor) + Substring_nchimera_novel(acceptor);
-#if 0
- /* Adversely affects comparison based on nchimera_known */
- if (new->start_ambiguous_p == true && favor_ambiguous_p == true) {
- new->nchimera_known++;
- /* new->nchimera_novel--; */
- }
- if (new->end_ambiguous_p == true && favor_ambiguous_p == true) {
- new->nchimera_known++;
- /* new->nchimera_novel--; */
- }
-#endif
+ new->nsplices = 2;
new->effective_chrnum = new->chrnum;
new->other_chrnum = 0;
@@ -6315,58 +7293,42 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
/* Currently not allowing translocations on shortexons */
/* substring_for_concordance = (Substring_T) NULL; */
-#if 0
- if (sensedir == SENSE_FORWARD) {
- if (new->plusp == true) {
- new->substring_low = (new->substringD != NULL ? new->substringD : new->substring1); /* donor */
- new->substring_high = (new->substringA != NULL ? new->substringA : new->substring1); /* acceptor */
- } else {
- new->substring_low = (new->substringA != NULL ? new->substringA : new->substring1); /* acceptor */
- new->substring_high = (new->substringD != NULL ? new->substringD : new->substring1); /* donor */
+ new->substrings_LtoH = (List_T) NULL;
+ new->junctions_LtoH = (List_T) NULL;
+ if (new->plusp == true) {
+ if (substring2 != NULL) {
+ new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) substring2);
+ new->junctions_LtoH = List_push(new->junctions_LtoH,(void *) junction2);
}
-
- } else if (sensedir == SENSE_ANTI) {
- if (new->plusp == true) {
- new->substring_low = (new->substringA != NULL ? new->substringA : new->substring1); /* acceptor */
- new->substring_high = (new->substringD != NULL ? new->substringD : new->substring1); /* donor */
- } else {
- new->substring_low = (new->substringD != NULL ? new->substringD : new->substring1); /* donor */
- new->substring_high = (new->substringA != NULL ? new->substringA : new->substring1); /* acceptor */
+ new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) substring1);
+ if (substring0 != NULL) {
+ new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) substring0);
+ new->junctions_LtoH = List_push(new->junctions_LtoH,(void *) junction0);
}
} else {
- abort();
- }
-
-#elif 0
- if (new->plusp == true) {
- new->substring_low = (new->substring0 != NULL ? new->substring0 : new->substring1);
- new->substring_high = (new->substring2 != NULL ? new->substring2 : new->substring1);
- } else {
- new->substring_low = (new->substring2 != NULL ? new->substring2 : new->substring1);
- new->substring_high = (new->substring0 != NULL ? new->substring0 : new->substring1);
+ if (substring0 != NULL) {
+ new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) substring0);
+ new->junctions_LtoH = List_push(new->junctions_LtoH,(void *) junction0);
+ }
+ new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) substring1);
+ if (substring2 != NULL) {
+ new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) substring2);
+ new->junctions_LtoH = List_push(new->junctions_LtoH,(void *) junction2);
+ }
}
-#else
- new->substring_LtoH = (List_T) NULL;
if (new->plusp == true) {
- if (new->substring2 != NULL) {
- new->substring_LtoH = List_push(new->substring_LtoH,(void *) new->substring2);
- }
- new->substring_LtoH = List_push(new->substring_LtoH,(void *) new->substring1);
- if (new->substring0 != NULL) {
- new->substring_LtoH = List_push(new->substring_LtoH,(void *) new->substring0);
- }
+ new->substrings_1toN = List_copy(new->substrings_LtoH);
+ new->substrings_Nto1 = List_reverse(List_copy(new->substrings_LtoH));
+ new->junctions_1toN = List_copy(new->junctions_LtoH);
+ new->junctions_Nto1 = List_reverse(List_copy(new->junctions_LtoH));
} else {
- if (new->substring0 != NULL) {
- new->substring_LtoH = List_push(new->substring_LtoH,(void *) new->substring0);
- }
- new->substring_LtoH = List_push(new->substring_LtoH,(void *) new->substring1);
- if (new->substring2 != NULL) {
- new->substring_LtoH = List_push(new->substring_LtoH,(void *) new->substring2);
- }
+ new->substrings_1toN = List_reverse(List_copy(new->substrings_LtoH));
+ new->substrings_Nto1 = List_copy(new->substrings_LtoH);
+ new->junctions_1toN = List_reverse(List_copy(new->junctions_LtoH));
+ new->junctions_Nto1 = List_copy(new->junctions_LtoH);
}
-#endif
#if 0
@@ -6376,26 +7338,20 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
#endif
new->nmismatches_whole = Substring_nmismatches_whole(shortexon);
- if (donor != NULL) {
- new->nmismatches_whole += Substring_nmismatches_whole(donor);
- }
- if (acceptor != NULL) {
- new->nmismatches_whole += Substring_nmismatches_whole(acceptor);
- }
- new->ntscore = splicing_penalty + splicing_penalty + new->nmismatches_whole;
+ new->nmismatches_whole += Substring_nmismatches_whole(donor);
+ new->nmismatches_whole += Substring_nmismatches_whole(acceptor);
+ new->ntscore = splicing_penalty + splicing_penalty + new->nmismatches_whole;
new->score = new->ntscore;
+#if 0
if (sensedir == SENSE_FORWARD) {
new->score += antistranded_penalty;
}
+#endif
new->nmismatches_bothdiff = Substring_nmismatches_bothdiff(shortexon);
- if (donor != NULL) {
- new->nmismatches_bothdiff += Substring_nmismatches_bothdiff(donor);
- }
- if (acceptor != NULL) {
- new->nmismatches_bothdiff += Substring_nmismatches_bothdiff(acceptor);
- }
+ new->nmismatches_bothdiff += Substring_nmismatches_bothdiff(donor);
+ new->nmismatches_bothdiff += Substring_nmismatches_bothdiff(acceptor);
/* new->nmismatches_refdiff = Substring_nmismatches_refdiff(donor) + Substring_nmismatches_refdiff(acceptor) + Substring_nmismatches_refdiff(shortexon); */
new->nmatches = Substring_nmatches(shortexon);
@@ -6417,23 +7373,15 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
new->nmatches += Substring_nmatches(acceptor);
}
- if (new->substring0 != NULL) {
- new->trim_left = Substring_trim_left(new->substring0);
- new->trim_left_splicep = Substring_trim_left_splicep(new->substring0);
- } else {
- new->trim_left = Substring_trim_left(new->substring1);
- new->trim_left_splicep = Substring_trim_left_splicep(new->substring1);
- }
+ substring = (Substring_T) List_head(new->substrings_1toN);
+ new->trim_left = Substring_trim_left(substring);
+ new->trim_left_splicep = Substring_trim_left_splicep(substring);
- if (new->substring2 != NULL) {
- new->trim_right = Substring_trim_right(new->substring2);
- new->trim_right_splicep = Substring_trim_right_splicep(new->substring2);
- } else {
- new->trim_right = Substring_trim_right(new->substring1);
- new->trim_right_splicep = Substring_trim_right_splicep(new->substring1);
- }
+ substring = (Substring_T) List_head(new->substrings_Nto1);
+ new->trim_right = Substring_trim_right(substring);
+ new->trim_right_splicep = Substring_trim_right_splicep(substring);
- new->penalties = splicing_penalty + splicing_penalty;
+ /* new->penalties = splicing_penalty + splicing_penalty; */
/* new->gene_overlap = NO_KNOWN_GENE; -- initialized later when resolving multimappers */
new->tally = -1L;
@@ -6463,28 +7411,90 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
Univcoord_T genomicstart, genomicend, alignstart, alignend, alignstart_trim, alignend_trim;
int nmismatches_whole, minlength;
bool trim_left_p, trim_right_p;
+ int outofbounds_start = 0, outofbounds_end = 0;
- debug0(printf("\nStage3end_new_terminal possible: endtypes %s and %s, left %llu, querystart %d, queryend %d\n",
- Endtype_string(start_endtype),Endtype_string(end_endtype),(unsigned long long) left,querystart,queryend));
+
+ debug0(printf("\nStage3end_new_terminal possible: endtypes %s and %s, left %llu, querystart %d, queryend %d, sarrayp %d\n",
+ Endtype_string(start_endtype),Endtype_string(end_endtype),(unsigned long long) left,querystart,queryend,sarrayp));
if (plusp == true) {
- if ((genomicend = left + querylength) > chrhigh) {
+ genomicstart = left;
+ genomicend = left + querylength;
+
+ alignstart = genomicstart + querystart;
+ alignend = genomicstart + queryend;
+
+ if (genomicstart < chroffset && genomicend > chrhigh) {
+ /* Out of bounds on both sides */
return (T) NULL;
- } else {
- genomicstart = left;
- alignstart = genomicstart + querystart;
- alignend = genomicstart + queryend;
+ } else if (genomicstart < chroffset) {
+ outofbounds_start = chroffset - genomicstart;
+ outofbounds_end = genomicend - chroffset;
+ debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_start > outofbounds_end) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ outofbounds_start = 0;
+ } else {
+ /* Consider low part to be out of bounds and stay in this chromosome */
+ /* Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint); */
+ outofbounds_end = 0;
+ }
+
+ } else if (genomicend > chrhigh) {
+ outofbounds_start = chrhigh - genomicstart;
+ outofbounds_end = genomicend - chrhigh;
+ debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_start > outofbounds_end) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ outofbounds_start = 0;
+ } else if (++chrnum > nchromosomes) {
+ return (T) NULL;
+ } else {
+ /* Consider low part to be out of bounds and move to next chromosome */
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ outofbounds_end = 0;
+ }
}
} else {
- if ((genomicstart = left + querylength) > chrhigh) {
+ genomicend = left;
+ genomicstart = left + querylength;
+
+ alignstart = genomicstart - querystart;
+ alignend = genomicstart - queryend;
+
+ if (genomicend < chroffset && genomicstart > chrhigh) {
+ /* Out of bounds on both sides */
return (T) NULL;
- } else {
- genomicend = left;
-
- alignstart = genomicstart - querystart;
- alignend = genomicstart - queryend;
+
+ } else if (genomicend < chroffset) {
+ outofbounds_end = chroffset - genomicend;
+ outofbounds_start = genomicstart - chroffset;
+ debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_end > outofbounds_start) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ outofbounds_end = 0;
+ } else {
+ /* Consider low part to be out of bounds and stay in this chromosome */
+ /* Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint); */
+ outofbounds_start = 0;
+ }
+
+ } else if (genomicstart > chrhigh) {
+ outofbounds_end = chrhigh - genomicend;
+ outofbounds_start = genomicstart - chrhigh;
+ debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+ if (outofbounds_end > outofbounds_start) {
+ /* Consider high part to be out of bounds and keep existing chromosome */
+ outofbounds_end = 0;
+ } else if (++chrnum > nchromosomes) {
+ return (T) NULL;
+ } else {
+ /* Consider low part to be out of bounds and move to next chromosome */
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ outofbounds_start = 0;
+ }
}
}
@@ -6506,12 +7516,11 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
minlength = TERMINAL_COMPUTE_MINLENGTH;
}
- if ((substring = Substring_new(/*nmismatches_whole*/0,chrnum,chroffset,chrhigh,chrlength,left,
- genomicstart,genomicend,/*genomicstart_adj*/genomicstart,/*genomicend_adj*/genomicend,
+ if ((substring = Substring_new(/*nmismatches_whole*/0,chrnum,chroffset,chrhigh,chrlength,
query_compress,start_endtype,end_endtype,querystart,queryend,querylength,
alignstart,alignend,/*genomiclength*/querylength,
- /*extraleft*/0,/*extraright*/0,/*exactp*/false,plusp,genestrand,first_read_p,
- trim_left_p,trim_right_p,minlength)) == NULL) {
+ /*exactp*/false,plusp,genestrand,first_read_p,
+ trim_left_p,trim_right_p,outofbounds_start,outofbounds_end,minlength)) == NULL) {
debug0(printf("returning NULL\n"));
return (T) NULL;
@@ -6573,16 +7582,19 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
(unsigned long long) left,(unsigned long long) genomicstart,(unsigned long long) genomicend,
(unsigned long long) chrhigh,chrnum,querystart,queryend));
- new->substring1 = substring;
- new->substring2 = (Substring_T) NULL;
- new->substring0 = (Substring_T) NULL;
- new->substring_donor = new->substring_acceptor = (Substring_T) NULL;
- new->substringD = new->substringA = (Substring_T) NULL;
- new->substring_LtoH = List_push(NULL,(void *) new->substring1);
+ new->substrings_LtoH = List_push(NULL,(void *) substring);
+ new->substrings_1toN = List_push(NULL,(void *) substring);
+ new->substrings_Nto1 = List_push(NULL,(void *) substring);
+
+ new->junctions_LtoH = (List_T) NULL;
+ new->junctions_1toN = (List_T) NULL;
+ new->junctions_Nto1 = (List_T) NULL;
+
new->pairarray = (struct Pair_T *) NULL;
+ new->cigar_tokens = (List_T) NULL;
+ new->gmap_intronp = false;
- new->deletion = (char *) NULL;
new->querylength_adj = new->querylength = querylength;
new->genomicstart = genomicstart;
new->genomicend = genomicend;
@@ -6601,6 +7613,7 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
new->hittype = TERMINAL;
new->genestrand = genestrand;
new->sarrayp = sarrayp;
+ new->gmap_source = GMAP_NOT_APPLICABLE;
new->improved_by_gmap_p = false;
new->chrnum = new->effective_chrnum = chrnum;
@@ -6617,8 +7630,6 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
#endif
new->nindels = 0;
- new->indel_pos = 0;
- new->indel_low = 0;
new->nmismatches_whole = Substring_nmismatches_whole(substring); /* This value was recomputed to include non-terminal end */
new->ntscore = /* terminal_penalty + */ nmismatches_whole;
@@ -6643,32 +7654,16 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
new->trim_left_splicep = Substring_trim_left_splicep(substring);
new->trim_right_splicep = Substring_trim_right_splicep(substring);
- new->penalties = 0;
+ /* new->penalties = 0; */
/* new->gene_overlap = NO_KNOWN_GENE; -- initialized later when resolving multimappers */
new->tally = -1L;
- new->start_amb_length = new->end_amb_length = 0;
- new->start_amb_prob = new->end_amb_prob = 0.0;
- new->amb_length_donor = new->amb_length_acceptor = 0;
-
- new->start_ambiguous_p = new->end_ambiguous_p = false;
- new->start_ambcoords = new->end_ambcoords = (Univcoord_T *) NULL;
- new->ambcoords_donor = new->ambcoords_acceptor = (Univcoord_T *) NULL;
- new->start_amb_knowni = new->end_amb_knowni = (int *) NULL;
- new->amb_knowni_donor = new->amb_knowni_acceptor = (int *) NULL;
- new->start_amb_nmismatches = new->end_amb_nmismatches = (int *) NULL;
- new->amb_nmismatches_donor = new->amb_nmismatches_acceptor = (int *) NULL;
- new->start_amb_probs = new->end_amb_probs = (double *) NULL;
- new->amb_probs_donor = new->amb_probs_acceptor = (double *) NULL;
- new->start_nambcoords = new->end_nambcoords = 0;
- new->nambcoords_donor = new->nambcoords_acceptor = 0;
- new->nchimera_known = 0;
- new->nchimera_novel = 0;
+ new->nsplices = 0;
new->distance = 0U;
new->shortexonA_distance = new->shortexonD_distance = 0U;
- new->sensedir = new->sensedir_nonamb = SENSE_NULL;
+ new->sensedir = SENSE_NULL;
new->paired_usedp = false;
new->paired_seenp = false;
@@ -6687,37 +7682,64 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
double ambig_prob_5, double ambig_prob_3, double min_splice_prob,
struct Pair_T *pairarray, int npairs, int nsegments, int nintrons, int nindelbreaks,
Univcoord_T left, int genomiclength, bool plusp, int genestrand, bool first_read_p,
- int querylength, Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength,
- int cdna_direction, int sensedir) {
+ char *accession, int querylength, Chrnum_T chrnum,
+ Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength,
+ int cdna_direction, int sensedir, GMAP_source_T gmap_source) {
T new;
Univcoord_T genomicstart, genomicend, genomepos;
double prob1, prob2;
+ Pair_T start, end;
+ List_T cigar_tokens;
+ bool intronp;
+ int hardclip_start, hardclip_end;
+
/* In 2012-12-20, removed statements to return NULL, because GMAP alignments seem
to be okay, at least when starting before coordinate 0 */
/* Example (when aligned to chrM at beginning of genome) (actually aligns circularly):
-GGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTG
-ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
+ GGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTG
+ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
*/
/* However, this leads to fatal bugs later, so restored these statements */
- if (Stage3_bad_stretch_p(pairarray,npairs,/*pos5*/0,/*pos3*/querylength) == true) {
+ start = &(pairarray[0]);
+ end = &(pairarray[npairs-1]);
+ hardclip_start = start->querypos;
+ hardclip_end = (querylength - 1) - end->querypos;
+
+ cigar_tokens = Pair_compute_cigar(&intronp,&hardclip_start,&hardclip_end,pairarray,npairs,querylength,
+ /*watsonp*/plusp,sensedir,/*chimera_part*/0);
+ if (Pair_tokens_cigarlength(cigar_tokens) + hardclip_start + hardclip_end != querylength) {
+ fprintf(stderr,"Could not compute a valid cigar for %s: %d + %d + %d != %d\n",
+ accession,Pair_tokens_cigarlength(cigar_tokens),hardclip_start,hardclip_end,querylength);
+ Pair_dump_array_stderr(pairarray,npairs,/*zerobasedp*/true);
+ Pair_tokens_free(&cigar_tokens);
+#ifdef CHECK_ASSERTIONS
+ abort();
+#endif
+ return (T) NULL;
+
+ } else if (Stage3_bad_stretch_p(pairarray,npairs,/*pos5*/0,/*pos3*/querylength) == true) {
debug0(printf("Bad GMAP: bad stretch\n"));
+ Pair_tokens_free(&cigar_tokens);
return (T) NULL;
} else if (plusp == true) {
genomicstart = left;
if ((genomicend = left + genomiclength) > chrhigh) {
+ Pair_tokens_free(&cigar_tokens);
return (T) NULL;
}
if (genomicstart > genomicend) {
/* Must have started before coordinate 0 */
debug0(printf("plusp and genomicstart %llu > genomicend %llu => started before coordinate 0\n",
(unsigned long long) genomicstart,(unsigned long long) genomicend));
+ Pair_tokens_free(&cigar_tokens);
return (T) NULL;
}
} else {
if ((genomicstart = left + genomiclength) > chrhigh) {
+ Pair_tokens_free(&cigar_tokens);
return (T) NULL;
}
genomicend = left;
@@ -6725,28 +7747,33 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
/* Must have started before coordinate 0 */
debug0(printf("minusp and genomicend %llu > genomicstart %llu => started before coordinate 0\n",
(unsigned long long) genomicend,(unsigned long long) genomicstart));
+ Pair_tokens_free(&cigar_tokens);
return (T) NULL;
}
}
new = (T) MALLOC_OUT(sizeof(*new));
- debug0(printf("Stage3end_new_gmap %p: left %llu, genomicstart/end %u..%u, chrhigh %llu, chrnum %d, nmismatches %d, cdna_direction %d, sensedir %d, max_match_length %d\n",
+
+ debug0(printf("Stage3end_new_gmap %p: left %llu, genomicstart/end %u..%u, chrhigh %llu, chrnum %d, nmismatches %d, cdna_direction %d, sensedir %d, max_match_length %d, gmap_source %d\n",
new,(unsigned long long) left,(unsigned int) (genomicstart - chroffset),(unsigned int) (genomicend - chroffset),
- (unsigned long long) chrhigh,chrnum,nmismatches_whole,cdna_direction,sensedir,max_match_length));
+ (unsigned long long) chrhigh,chrnum,nmismatches_whole,cdna_direction,sensedir,max_match_length,gmap_source));
debug0(printf(" ambig_end_length_5 %d (prob %f), ambig_end_length_3 %d (prob %f)\n",ambig_end_length_5,ambig_prob_5,ambig_end_length_3,ambig_prob_3));
- new->substring1 = (Substring_T) NULL;
- new->substring2 = (Substring_T) NULL;
- new->substring0 = (Substring_T) NULL;
- new->substring_donor = new->substring_acceptor = (Substring_T) NULL;
- new->substringD = new->substringA = (Substring_T) NULL;
- new->substring_LtoH = (List_T) NULL;
+ new->substrings_LtoH = (List_T) NULL;
+ new->substrings_1toN = (List_T) NULL;
+ new->substrings_Nto1 = (List_T) NULL;
+
+ new->junctions_LtoH = (List_T) NULL;
+ new->junctions_1toN = (List_T) NULL;
+ new->junctions_Nto1 = (List_T) NULL;
+
new->pairarray = pairarray;
new->npairs = npairs;
+ new->cigar_tokens = cigar_tokens;
+ new->gmap_intronp = intronp;
new->nsegments = nsegments;
- new->deletion = (char *) NULL;
new->querylength_adj = new->querylength = querylength /* - nindels */;
new->genomicstart = genomicstart;
new->genomicend = genomicend;
@@ -6765,6 +7792,7 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
new->hittype = GMAP;
new->genestrand = genestrand;
new->sarrayp = false;
+ new->gmap_source = gmap_source;
new->improved_by_gmap_p = false;
new->chrnum = new->effective_chrnum = chrnum;
@@ -6776,7 +7804,7 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
new->gmap_nindelbreaks = nindelbreaks;
new->gmap_cdna_direction = cdna_direction;
new->gmap_nintrons = nintrons;
- new->sensedir = new->sensedir_nonamb = sensedir;
+ new->sensedir = sensedir;
#if 0
new->mapq_loglik = Substring_mapq_loglik(substring);
@@ -6785,8 +7813,6 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
#endif
new->nindels = 0;
- new->indel_pos = 0;
- new->indel_low = 0;
new->nmismatches_whole = nmismatches_whole;
new->ntscore = nmismatches_whole;
@@ -6830,12 +7856,14 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
if (new->nmatches_posttrim < querylength/2) {
debug0(printf(" nmatches %d < querylength %d/2, so returning NULL\n",
new->nmatches_posttrim,querylength));
+ Pair_tokens_free(&cigar_tokens);
FREE_OUT(new);
- return NULL;
+ return (T) NULL;
} else if (max_match_length < gmap_min_nconsecutive) {
debug0(printf(" max_match_length %d < %d, so returning NULL\n",max_match_length,gmap_min_nconsecutive));
+ Pair_tokens_free(&cigar_tokens);
FREE_OUT(new);
- return NULL;
+ return (T) NULL;
}
new->gmap_max_match_length = max_match_length;
@@ -6843,7 +7871,7 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
new->trim_left = Pair_querypos(&(pairarray[0])) - ambig_end_length_5;
- if (ambig_end_length_5 > 0) {
+ if ((new->gmap_start_amb_length = ambig_end_length_5) > 0) {
new->trim_left_splicep = true;
} else if (novelsplicingp == false) {
new->trim_left_splicep = false;
@@ -6866,7 +7894,7 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
}
new->trim_right = (querylength - 1) - Pair_querypos(&(pairarray[npairs-1])) - ambig_end_length_3;
- if (ambig_end_length_3 > 0) {
+ if ((new->gmap_end_amb_length = ambig_end_length_3) > 0) {
new->trim_right_splicep = true;
} else if (novelsplicingp == false) {
new->trim_right_splicep = false;
@@ -6888,6 +7916,7 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
}
}
+#if 0
/* new->penalties not used anyway for GMAP alignments */
#ifdef SCORE_INDELS
/* indel_penalty will be counted later */
@@ -6897,11 +7926,12 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
#endif
/* new->penalties += ambig_end_length_5/ambig_end_interval; */
/* new->penalties += ambig_end_length_3/ambig_end_interval; */
+#endif
/* new->gene_overlap = NO_KNOWN_GENE; -- initialized later when resolving multimappers */
new->tally = -1L;
- if ((new->start_amb_length = ambig_end_length_5) == 0) {
+ if (ambig_end_length_5 == 0) {
new->gmap_start_endtype = END;
} else if (ambig_splicetype_5 == DONOR || ambig_splicetype_5 == ANTIDONOR) {
new->gmap_start_endtype = AMB_DON;
@@ -6912,9 +7942,9 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
ambig_splicetype_5,ambig_end_length_5);
abort();
}
- new->start_amb_prob = ambig_prob_5;
+ /* new->start_amb_prob = ambig_prob_5; */
- if ((new->end_amb_length = ambig_end_length_3) == 0) {
+ if (ambig_end_length_3 == 0) {
new->gmap_end_endtype = END;
} else if (ambig_splicetype_3 == DONOR || ambig_splicetype_3 == ANTIDONOR) {
new->gmap_end_endtype = AMB_DON;
@@ -6925,23 +7955,15 @@ ATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAG
ambig_splicetype_3,ambig_end_length_3);
abort();
}
- new->end_amb_prob = ambig_prob_3;
-
- new->amb_length_donor = new->amb_length_acceptor = 0;
+ /* new->end_amb_prob = ambig_prob_3; */
- new->start_ambiguous_p = new->end_ambiguous_p = false;
- new->start_ambcoords = new->end_ambcoords = (Univcoord_T *) NULL;
- new->ambcoords_donor = new->ambcoords_acceptor = (Univcoord_T *) NULL;
- new->start_amb_knowni = new->end_amb_knowni = (int *) NULL;
- new->amb_knowni_donor = new->amb_knowni_acceptor = (int *) NULL;
- new->start_amb_nmismatches = new->end_amb_nmismatches = (int *) NULL;
- new->amb_nmismatches_donor = new->amb_nmismatches_acceptor = (int *) NULL;
- new->start_amb_probs = new->end_amb_probs = (double *) NULL;
- new->amb_probs_donor = new->amb_probs_acceptor = (double *) NULL;
- new->start_nambcoords = new->end_nambcoords = 0;
- new->nambcoords_donor = new->nambcoords_acceptor = 0;
- new->nchimera_known = 0;
- new->nchimera_novel = 0; /* nintrons? */
+ new->nsplices = nintrons;
+ if (ambig_end_length_5 > 0) {
+ new->nsplices += 1;
+ }
+ if (ambig_end_length_3 > 0) {
+ new->nsplices += 1;
+ }
new->distance = 0U;
new->shortexonA_distance = new->shortexonD_distance = 0;
@@ -7062,44 +8084,31 @@ Stage3pair_output_cmp (const void *a, const void *b) {
static float
Stage3end_compute_mapq (Stage3end_T this, Compress_T query_compress_fwd, Compress_T query_compress_rev,
char *quality_string, bool trim_terminals_p) {
+ List_T p;
+ Substring_T substring;
if (this == NULL) {
return 0.0;
} else if (this->hittype == GMAP) {
this->mapq_loglik = Pair_compute_mapq(this->pairarray,this->npairs,
- this->trim_left,this->trim_right,this->querylength_adj,
+ this->trim_left,this->trim_right,this->querylength,
quality_string,trim_terminals_p);
} else if (this->plusp == true) {
- this->mapq_loglik =
- Substring_compute_mapq(this->substring1,query_compress_fwd,quality_string,trim_terminals_p);
-
- if (this->substring2 != NULL) {
- this->mapq_loglik +=
- Substring_compute_mapq(this->substring2,query_compress_fwd,
- quality_string,trim_terminals_p);
- }
- if (this->substring0 != NULL) {
+ this->mapq_loglik = 0.0;
+ for (p = this->substrings_LtoH; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
this->mapq_loglik +=
- Substring_compute_mapq(this->substring0,query_compress_fwd,
- quality_string,trim_terminals_p);
+ Substring_compute_mapq(substring,query_compress_fwd,quality_string,trim_terminals_p);
}
} else {
- this->mapq_loglik =
- Substring_compute_mapq(this->substring1,query_compress_rev,
- quality_string,trim_terminals_p);
-
- if (this->substring2 != NULL) {
- this->mapq_loglik +=
- Substring_compute_mapq(this->substring2,query_compress_rev,
- quality_string,trim_terminals_p);
- }
- if (this->substring0 != NULL) {
+ this->mapq_loglik = 0.0;
+ for (p = this->substrings_LtoH; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
this->mapq_loglik +=
- Substring_compute_mapq(this->substring0,query_compress_rev,
- quality_string,trim_terminals_p);
+ Substring_compute_mapq(substring,query_compress_rev,quality_string,trim_terminals_p);
}
}
@@ -7109,35 +8118,147 @@ Stage3end_compute_mapq (Stage3end_T this, Compress_T query_compress_fwd, Compres
static void
-Stage3end_display_prep (Stage3end_T this, char *query, Compress_T query_compress_fwd, Compress_T query_compress_rev,
- Genome_T genome) {
- char *deletion_ignore;
+Stage3end_display_prep (Stage3end_T this, char *queryuc_ptr, char *queryrc,
+ Compress_T query_compress_fwd, Compress_T query_compress_rev,
+ int amb_resolve, bool first_read_p) {
+ List_T p, q;
+ Substring_T substring, anchor;
+ Junction_T pre_junction, post_junction, junction;
+ int extraleft, extraright, extralow, extrahigh;
+ Univcoord_T left, ignore;
+ double donor_prob, acceptor_prob;
+ int type;
+
if (this != NULL) {
debug0(printf("Doing a display prep of end %p\n",this));
if (this->hittype == GMAP) {
this->nmismatches_refdiff = this->nmismatches_bothdiff;
- } else if (this->hittype == DELETION) {
- this->nmismatches_refdiff =
- Substring_display_prep(&this->deletion,this->substring1,query,query_compress_fwd,query_compress_rev,
- genome,/*deletion_pos*/this->indel_pos,
- /*deletion_length*/this->nindels);
} else {
- this->nmismatches_refdiff =
- Substring_display_prep(&deletion_ignore,this->substring1,query,query_compress_fwd,query_compress_rev,
- genome,/*deletion_pos*/-1,/*deletion_length*/0);
- }
+ /* Resolve ambiguous end */
+ if (amb_resolve >= 0) {
+ if (first_read_p == true) {
+ substring = (Substring_T) List_head(this->substrings_Nto1);
+ anchor = (Substring_T) List_head(List_next(this->substrings_Nto1));
+ junction = (Junction_T) List_head(this->junctions_Nto1);
+ left = Substring_set_unambiguous(&donor_prob,&acceptor_prob,&ignore,&this->genomicend,substring,amb_resolve);
+ if (this->plusp == true) {
+ Junction_set_unambiguous(junction,left - Substring_left(anchor),donor_prob,acceptor_prob);
+ } else {
+ Junction_set_unambiguous(junction,Substring_left(anchor) - left,donor_prob,acceptor_prob);
+ }
- if (this->substring2 != NULL) {
- this->nmismatches_refdiff +=
- Substring_display_prep(&deletion_ignore,this->substring2,query,query_compress_fwd,query_compress_rev,
- genome,/*deletion_pos*/-1,/*deletion_length*/0);
- }
- if (this->substring0 != NULL) {
- this->nmismatches_refdiff +=
- Substring_display_prep(&deletion_ignore,this->substring0,query,query_compress_fwd,query_compress_rev,
- genome,/*deletion_pos*/-1,/*deletion_length*/0);
+ } else {
+ substring = (Substring_T) List_head(this->substrings_1toN);
+ anchor = (Substring_T) List_head(List_next(this->substrings_1toN));
+ junction = (Junction_T) List_head(this->junctions_1toN);
+ left = Substring_set_unambiguous(&donor_prob,&acceptor_prob,&this->genomicstart,&ignore,substring,amb_resolve);
+ if (this->plusp == true) {
+ Junction_set_unambiguous(junction,Substring_left(anchor) - left,donor_prob,acceptor_prob);
+ } else {
+ Junction_set_unambiguous(junction,left - Substring_left(anchor),donor_prob,acceptor_prob);
+ }
+ }
+ }
+
+ this->nmismatches_refdiff = 0;
+
+ /* First segments */
+ /* For operations on substrings, proceed in 1toN order, not LtoH order */
+ substring = (Substring_T) List_head(this->substrings_1toN);
+ extraleft = Substring_querystart(substring); /* terminal start */
+
+ if (List_length(this->substrings_1toN) == 1) {
+ post_junction = (Junction_T) NULL;
+ extraright = this->querylength - Substring_queryend(substring); /* terminal end */
+ } else {
+ post_junction = (Junction_T) List_head(this->junctions_1toN);
+ if (Junction_type(post_junction) == SPLICE_JUNCTION) {
+ extraright = 2;
+ } else {
+ extraright = 0;
+ }
+ }
+
+ if (Substring_ambiguous_p(substring) == true) {
+ } else {
+ this->nmismatches_refdiff +=
+ Substring_display_prep(substring,queryuc_ptr,this->querylength,
+ extraleft,extraright,query_compress_fwd,query_compress_rev,
+ genome);
+ }
+
+ if ((p = List_next(this->substrings_1toN)) == NULL) {
+ /* No middle segments */
+ } else {
+ for (q = List_next(this->junctions_1toN); q != NULL; p = List_next(p), q = List_next(q)) {
+ /* Middle segments */
+ pre_junction = post_junction;
+ post_junction = List_head(q);
+#if 0
+ extraleft = 0;
+ if ((type = Junction_type(pre_junction)) == INS_JUNCTION) {
+ ninsertions += Junction_nindels(pre_junction);
+ } else if (type == DEL_JUNCTION) {
+ ndeletions += Junction_nindels(pre_junction);
+ } else if (type == SPLICE_JUNCTION) {
+ extraleft = 2;
+ }
+#else
+ if (Junction_type(pre_junction) == SPLICE_JUNCTION) {
+ extraleft = 2;
+ } else {
+ extraleft = 0;
+ }
+#endif
+ if (Junction_type(post_junction) == SPLICE_JUNCTION) {
+ extraright = 2;
+ } else {
+ extraright = 0;
+ }
+
+ substring = (Substring_T) List_head(p);
+ if (Substring_ambiguous_p(substring) == true) {
+ /* Skip */
+ } else {
+ this->nmismatches_refdiff +=
+ Substring_display_prep(substring,queryuc_ptr,this->querylength,
+ extraleft,extraright,query_compress_fwd,query_compress_rev,
+ genome);
+ }
+ }
+
+ /* Last segment */
+ pre_junction = post_junction;
+#if 0
+ extraleft = 0;
+ if ((type = Junction_type(pre_junction)) == INS_JUNCTION) {
+ ninsertions += Junction_nindels(pre_junction);
+ } else if (type == DEL_JUNCTION) {
+ ndeletions += Junction_nindels(pre_junction);
+ } else if (type == SPLICE_JUNCTION) {
+ extraleft = 2;
+ }
+#else
+ if (Junction_type(pre_junction) == SPLICE_JUNCTION) {
+ extraleft = 2;
+ } else {
+ extraleft = 0;
+ }
+#endif
+ substring = (Substring_T) List_head(p);
+ extraright = this->querylength - Substring_queryend(substring);
+
+ if (Substring_ambiguous_p(substring) == true) {
+ /* Skip */
+ } else {
+ this->nmismatches_refdiff +=
+ Substring_display_prep(substring,queryuc_ptr,this->querylength,
+ extraleft,extraright,query_compress_fwd,query_compress_rev,
+ genome);
+ }
+ }
}
}
return;
@@ -7245,9 +8366,9 @@ Stage3end_sort_by_paired_seenp (List_T hits) {
Stage3end_T *
Stage3end_eval_and_sort (int *npaths, int *first_absmq, int *second_absmq,
Stage3end_T *stage3array, int maxpaths, Shortread_T queryseq,
+ char *queryuc_ptr, char *queryrc,
Compress_T query_compress_fwd, Compress_T query_compress_rev,
- Genome_T genome, char *quality_string, bool displayp) {
- char *query;
+ char *quality_string, bool displayp) {
float maxlik, loglik;
float total, q; /* For Bayesian mapq calculation */
int compute_npaths;
@@ -7268,9 +8389,8 @@ Stage3end_eval_and_sort (int *npaths, int *first_absmq, int *second_absmq,
stage3array[0]->absmq_score = MAPQ_MAXIMUM_SCORE;
if (displayp == true) {
- query = Shortread_fullpointer_uc(queryseq);
- Stage3end_display_prep(stage3array[0],query,query_compress_fwd,query_compress_rev,
- genome);
+ Stage3end_display_prep(stage3array[0],queryuc_ptr,queryrc,query_compress_fwd,query_compress_rev,
+ /*amb_resolve*/-1,/*first_read_p*/true);
}
*first_absmq = stage3array[0]->absmq_score;
*second_absmq = 0;
@@ -7366,10 +8486,9 @@ Stage3end_eval_and_sort (int *npaths, int *first_absmq, int *second_absmq,
if (displayp == true) {
/* Prepare for display */
- query = Shortread_fullpointer_uc(queryseq);
for (i = 0; i < compute_npaths; i++) {
- Stage3end_display_prep(stage3array[i],query,query_compress_fwd,query_compress_rev,
- genome);
+ Stage3end_display_prep(stage3array[i],queryuc_ptr,queryrc,query_compress_fwd,query_compress_rev,
+ /*amb_resolve*/-1,/*first_read_p*/true);
}
}
@@ -7406,6 +8525,8 @@ insertlength_expected (int insertlength) {
/* For concordant ends */
static Chrpos_T
pair_insert_length (Stage3end_T hit5, Stage3end_T hit3) {
+ List_T p, q;
+ Substring_T substring5, substring3;
if (hit5->plusp != hit3->plusp) {
debug10(printf("pair_insert_length: hit5->plusp %d != hit3->plusp %d, so returning 0\n",
@@ -7414,59 +8535,41 @@ pair_insert_length (Stage3end_T hit5, Stage3end_T hit3) {
}
if (hit5->chrnum != 0 && hit3->chrnum != 0) {
- if (Substring_overlap_p(hit5->substring1,hit3->substring1)) {
- return Substring_insert_length(hit5->substring1,hit3->substring1);
- } else if (hit5->substring2 != NULL && Substring_overlap_p(hit5->substring2,hit3->substring1)) {
- return Substring_insert_length(hit5->substring2,hit3->substring1);
- } else if (hit5->substring0 != NULL && Substring_overlap_p(hit5->substring0,hit3->substring1)) {
- return Substring_insert_length(hit5->substring0,hit3->substring1);
- }
-
- if (hit3->substring2 != NULL) {
- if (Substring_overlap_p(hit5->substring1,hit3->substring2)) {
- return Substring_insert_length(hit5->substring1,hit3->substring2);
- } else if (hit5->substring2 != NULL && Substring_overlap_p(hit5->substring2,hit3->substring2)) {
- return Substring_insert_length(hit5->substring2,hit3->substring2);
- } else if (hit5->substring0 != NULL && Substring_overlap_p(hit5->substring0,hit3->substring2)) {
- return Substring_insert_length(hit5->substring0,hit3->substring2);
- }
- }
-
- if (hit3->substring0 != NULL) {
- if (Substring_overlap_p(hit5->substring1,hit3->substring0)) {
- return Substring_insert_length(hit5->substring1,hit3->substring0);
- } else if (hit5->substring2 != NULL && Substring_overlap_p(hit5->substring2,hit3->substring0)) {
- return Substring_insert_length(hit5->substring2,hit3->substring0);
- } else if (hit5->substring0 != NULL && Substring_overlap_p(hit5->substring0,hit3->substring0)) {
- return Substring_insert_length(hit5->substring0,hit3->substring0);
+ for (q = hit3->substrings_1toN; q != NULL; q = List_next(q)) {
+ substring3 = (Substring_T) List_head(q);
+ for (p = hit5->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring5 = (Substring_T) List_head(p);
+ if (Substring_overlap_p(substring5,substring3)) {
+ return Substring_insert_length(substring5,substring3);
+ }
}
}
}
/* No overlap found between any combination of substrings */
if (hit5->plusp == true) {
- if (hit5->genomicend > hit3->genomicstart + hit5->querylength_adj + hit3->querylength_adj) {
- debug10(printf("pair_insert_length: no overlap found, and %llu - %llu + %d + %d < 0, so returning 0\n",
- (unsigned long long) hit3->genomicstart,(unsigned long long) hit5->genomicend,
- hit5->querylength_adj,hit3->querylength_adj));
+ if (hit5->genomicend > hit3->genomicstart + hit5->querylength + hit3->querylength) {
+ debug10(printf("pair_insert_length: no overlap found, and %u - %u + %d + %d < 0, so returning 0\n",
+ hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset,
+ hit5->querylength,hit3->querylength));
return 0;
} else {
- debug10(printf("pair_insert_length: no overlap found, so returning %llu - %llu + %d + %d\n",
- (unsigned long long) hit3->genomicstart,(unsigned long long) hit5->genomicend,
- hit5->querylength_adj,hit3->querylength_adj));
+ debug10(printf("pair_insert_length: no overlap found, so returning %u - %u + %d + %d\n",
+ hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset,
+ hit5->querylength,hit3->querylength));
}
- return hit3->genomicstart - hit5->genomicend + hit5->querylength_adj + hit3->querylength_adj;
+ return hit3->genomicstart - hit5->genomicend + hit5->querylength + hit3->querylength;
} else {
- if (hit3->genomicstart > hit5->genomicend + hit5->querylength_adj + hit3->querylength_adj) {
- debug10(printf("pair_insert_length: no overlap found, and %llu - %llu + %d + %d < 0, so returning 0\n",
- (unsigned long long) hit5->genomicend,(unsigned long long) hit3->genomicstart,
- hit5->querylength_adj,hit3->querylength_adj));
+ if (hit3->genomicstart > hit5->genomicend + hit5->querylength + hit3->querylength) {
+ debug10(printf("pair_insert_length: no overlap found, and %u - %u + %d + %d < 0, so returning 0\n",
+ hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset,
+ hit5->querylength,hit3->querylength));
return 0;
} else {
- debug10(printf("pair_insert_length: no overlap found, so returning %llu - %llu + %d + %d\n",
- (unsigned long long) hit5->genomicend,(unsigned long long) hit3->genomicstart,
- hit5->querylength_adj,hit3->querylength_adj));
- return hit5->genomicend - hit3->genomicstart + hit5->querylength_adj + hit3->querylength_adj;
+ debug10(printf("pair_insert_length: no overlap found, so returning %u - %u + %d + %d\n",
+ hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset,
+ hit5->querylength,hit3->querylength));
+ return hit5->genomicend - hit3->genomicstart + hit5->querylength + hit3->querylength;
}
}
}
@@ -7482,11 +8585,11 @@ pair_insert_length_unpaired (Stage3end_T hit5, Stage3end_T hit3) {
hit5->plusp,hit3->plusp));
return 0;
} else if (hit5->high < hit3->low) {
- return hit3->low - hit5->high + hit5->querylength_adj + hit3->querylength_adj;
+ return hit3->low - hit5->high + hit5->querylength + hit3->querylength;
} else if (hit3->high < hit5->low) {
- return hit5->low - hit3->high + hit5->querylength_adj + hit3->querylength_adj;
+ return hit5->low - hit3->high + hit5->querylength + hit3->querylength;
} else {
- return hit5->querylength_adj + hit3->querylength_adj;
+ return hit5->querylength + hit3->querylength;
}
}
@@ -7494,9 +8597,9 @@ pair_insert_length_unpaired (Stage3end_T hit5, Stage3end_T hit3) {
Stage3end_T *
Stage3end_eval_and_sort_guided (int *npaths, int *first_absmq, int *second_absmq, Stage3end_T guide,
Stage3end_T *stage3array, int maxpaths, Shortread_T queryseq,
+ char *queryuc_ptr, char *queryrc,
Compress_T query_compress_fwd, Compress_T query_compress_rev,
- Genome_T genome, char *quality_string, bool displayp) {
- char *query;
+ char *quality_string, bool displayp) {
float maxlik, loglik;
float total, q; /* For Bayesian mapq calculation */
int compute_npaths;
@@ -7517,9 +8620,8 @@ Stage3end_eval_and_sort_guided (int *npaths, int *first_absmq, int *second_absmq
stage3array[0]->absmq_score = MAPQ_MAXIMUM_SCORE;
if (displayp == true) {
- query = Shortread_fullpointer_uc(queryseq);
- Stage3end_display_prep(stage3array[0],query,query_compress_fwd,query_compress_rev,
- genome);
+ Stage3end_display_prep(stage3array[0],queryuc_ptr,queryrc,query_compress_fwd,query_compress_rev,
+ /*amb_resolve*/-1,/*first_read_p*/true);
}
*first_absmq = stage3array[0]->absmq_score;
*second_absmq = 0;
@@ -7620,10 +8722,9 @@ Stage3end_eval_and_sort_guided (int *npaths, int *first_absmq, int *second_absmq
if (displayp == true) {
/* Prepare for display */
- query = Shortread_fullpointer_uc(queryseq);
for (i = 0; i < compute_npaths; i++) {
- Stage3end_display_prep(stage3array[i],query,query_compress_fwd,query_compress_rev,
- genome);
+ Stage3end_display_prep(stage3array[i],queryuc_ptr,queryrc,query_compress_fwd,query_compress_rev,
+ /*amb_resolve*/-1,/*first_read_p*/true);
}
}
@@ -7655,14 +8756,13 @@ static List_T
Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist, int cutoff_level, int suboptimal_mismatches,
Compress_T query_compress_fwd, Compress_T query_compress_rev,
int querylength, bool keep_gmap_p, bool finalp) {
- List_T optimal = NULL, p;
+ List_T optimal = NULL, p, q;
T hit;
+ Substring_T substring;
int n;
int minscore = querylength;
int max_nmatches = 0, max_nmatches_posttrim = 0;
- int trim_left, trim_right;
- int min_trim_left = querylength, min_trim_right = querylength;
- int max_trim_left_terminal = 0, max_trim_right_terminal = 0;
+ int trim_left = querylength, trim_right = querylength;
int nindelbreaks;
#ifdef TRANSLOC_SPECIAL
@@ -7709,122 +8809,70 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist, int cutoff_level
hittype_string(hit->hittype),hit->trim_left,hit->trim_right));
if (hit->hittype == TERMINAL) {
/* Don't allow terminals to set trims */
-#if 0
- if (hit->trim_left > max_trim_left_terminal) {
- max_trim_left_terminal = hit->trim_left;
- }
- if (hit->trim_right > max_trim_right_terminal) {
- max_trim_right_terminal = hit->trim_right;
- }
-#endif
+#if 0
} else if ((hit->hittype == INSERTION || hit->hittype == DELETION) &&
- (hit->indel_pos < 15 || hit->indel_pos > hit->querylength_adj - 15)) {
+ (hit->indel_pos < 15 || hit->indel_pos > hit->querylength - 15)) {
/* Don't allow end indels to set trims */
+#endif
} else {
if (hit->trim_left_splicep == true) {
- if (hit->trim_left > max_trim_left_terminal) {
- max_trim_left_terminal = hit->trim_left;
- }
- } else if (hit->trim_left < min_trim_left) {
- min_trim_left = hit->trim_left;
+ /* Skip */
+ } else if (hit->trim_left < trim_left) {
+ trim_left = hit->trim_left;
}
if (hit->trim_right_splicep == true) {
- if (hit->trim_right > max_trim_right_terminal) {
- max_trim_right_terminal = hit->trim_right;
- }
- } else if (hit->trim_right < min_trim_right) {
- min_trim_right = hit->trim_right;
+ /* Skip */
+ } else if (hit->trim_right < trim_right) {
+ trim_right = hit->trim_right;
}
}
}
- if (min_trim_left == querylength) {
- trim_left = max_trim_left_terminal;
- } else {
- trim_left = (max_trim_left_terminal > min_trim_left) ? max_trim_left_terminal : min_trim_left;
+ if (trim_left == querylength) {
+ trim_left = 0;
}
- if (min_trim_right == querylength) {
- trim_right = max_trim_right_terminal;
- } else {
- trim_right = (max_trim_right_terminal > min_trim_right) ? max_trim_right_terminal : min_trim_right;
+ if (trim_right == querylength) {
+ trim_right = 0;
}
- debug4(printf("non-terminals: min_trim_left: %d, min_trim_right %d\n",min_trim_left,min_trim_right));
- debug4(printf("prefinal-terminals: max_trim_left: %d, max_trim_right %d\n",
- max_trim_left_terminal,max_trim_right_terminal));
debug4(printf("trim_left: %d, trim_right %d\n",trim_left,trim_right));
for (p = hitlist; p != NULL; p = p->rest) {
hit = (T) p->first;
- if (hit->hittype == TERMINAL && finalp == false) {
- /* Ignore */
- hit->score_eventrim = 0;
- } else if (hit->hittype == GMAP) {
- hit->score_eventrim = 0; /* was hit->penalties */
+ if (hit->hittype == GMAP) {
debug4(printf("score GMAP:"));
-#if 0
- if (Stage3end_bad_stretch_p(hit,query_compress_fwd,query_compress_rev) == true) {
- hit->score_eventrim += 2;
- debug4(printf(" bad stretch 2."));
- }
-#endif
-
-#if 0
- if (0 && hit->trim_left <= 8) {
- /* Ignore small trims */
- } else if (hit->trim_left > trim_left) {
- hit->score_eventrim += hit->trim_left - trim_left;
- debug4(printf(" add trim left (%d - %d).",hit->trim_left,trim_left));
- }
- if (0 && hit->trim_right <= 8) {
- /* Ignore small trims */
- } else if (hit->trim_right > trim_right) {
- hit->score_eventrim += hit->trim_right - trim_right;
- debug4(printf(" add trim right (%d - %d).",hit->trim_right,trim_right));
+ hit->score_eventrim = Pair_nmismatches_region(&nindelbreaks,hit->pairarray,hit->npairs,
+ trim_left,trim_right,start_amb_length(hit),end_amb_length(hit),
+ hit->querylength);
+ debug4(printf(" add mismatches %d.",hit->score_eventrim));
+ if (start_amb_length(hit) > 0) {
+ debug4(printf(" add penalty for start amb %d.",amb_penalty));
+ hit->score_eventrim += amb_penalty;
+ }
+ if (end_amb_length(hit) > 0) {
+ debug4(printf(" add penalty for end amb %d.",amb_penalty));
+ hit->score_eventrim += amb_penalty;
}
-#endif
-
- hit->score_eventrim += Pair_nmismatches_region(&nindelbreaks,hit->pairarray,hit->npairs,
- trim_left,trim_right,hit->start_amb_length,hit->end_amb_length,
- hit->querylength_adj);
- debug4(printf(" add nmismatches %d.",Pair_nmismatches_region(&nindelbreaks,hit->pairarray,hit->npairs,
- trim_left,trim_right,hit->start_amb_length,hit->end_amb_length,
- hit->querylength_adj)));
+
#ifdef SCORE_INDELS
hit->score_eventrim += indel_penalty_middle * nindelbreaks;
#endif
- if (hit->start_amb_prob < 0.9) {
- hit->score_eventrim += hit->start_amb_length / ambig_end_interval;
- debug4(printf(" add amb start %d/%d.",hit->start_amb_length,ambig_end_interval));
- }
- if (hit->end_amb_prob < 0.9) {
- hit->score_eventrim += hit->end_amb_length / ambig_end_interval;
- debug4(printf(" add amb end %d/%d.",hit->end_amb_length,ambig_end_interval));
- }
debug4(printf(" RESULT: %d\n",hit->score_eventrim));
} else {
+ hit->score_eventrim = 0; /* was hit->penalties */
debug4(printf("score OTHER:"));
- hit->score_eventrim = hit->penalties;
- debug4(printf(" penalties %d.",hit->penalties));
-
- hit->score_eventrim += Substring_count_mismatches_region(hit->substring0,trim_left,trim_right,
- query_compress_fwd,query_compress_rev);
- debug4(printf(" substring 0 %d.",Substring_count_mismatches_region(hit->substring0,trim_left,trim_right,
- query_compress_fwd,query_compress_rev)));
-
- hit->score_eventrim += Substring_count_mismatches_region(hit->substring1,trim_left,trim_right,
- query_compress_fwd,query_compress_rev);
- debug4(printf(" substring 1 %d.",Substring_count_mismatches_region(hit->substring1,trim_left,trim_right,
- query_compress_fwd,query_compress_rev)));
- hit->score_eventrim += Substring_count_mismatches_region(hit->substring2,trim_left,trim_right,
- query_compress_fwd,query_compress_rev);
- debug4(printf(" substring 2 %d.",Substring_count_mismatches_region(hit->substring2,trim_left,trim_right,
+ for (q = hit->substrings_1toN; q != NULL; q = List_next(q)) {
+ substring = (Substring_T) List_head(q);
+ hit->score_eventrim += Substring_count_mismatches_region(substring,trim_left,trim_right,
+ query_compress_fwd,query_compress_rev);
+ debug4(printf(" substring %d.",Substring_count_mismatches_region(substring,trim_left,trim_right,
query_compress_fwd,query_compress_rev)));
+ }
#ifdef SCORE_INDELS
/* Needs to match GMAP scoring */
@@ -7963,6 +9011,8 @@ Stage3end_optimal_score (List_T hitlist, int cutoff_level, int suboptimal_mismat
static void
unalias_circular (T hit) {
Chrpos_T chrlength = hit->chrlength;
+ List_T p;
+ Substring_T substring;
debug12(printf("Calling unalias_circular\n"));
assert(hit->alias == +1);
@@ -7970,9 +9020,10 @@ unalias_circular (T hit) {
Pair_unalias_circular(hit->pairarray,hit->npairs,chrlength);
} else {
- Substring_unalias_circular(hit->substring0);
- Substring_unalias_circular(hit->substring1);
- Substring_unalias_circular(hit->substring2);
+ for (p = hit->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ Substring_unalias_circular(substring);
+ }
}
/* Doesn't fix hitpair->low and hitpair->high */
@@ -8038,9 +9089,9 @@ Stage3end_remove_circular_alias (List_T hitlist) {
if (
#ifdef SOFT_CLIPS_AVOID_CIRCULARIZATION
- hit->low + trim >= hit->chroffset + hit->chrlength
+ hit->low + trim >= hit->chroffset + hit->chrlength
#else
- hit->low >= hit->chroffset + hit->chrlength
+ hit->low >= hit->chroffset + hit->chrlength
#endif
) {
/* All in circular alias */
@@ -8100,6 +9151,8 @@ duplicate_sort_cmp (const void *a, const void *b) {
T x = * (T *) a;
T y = * (T *) b;
Univcoord_T x_genomicstart, x_genomicend, y_genomicstart, y_genomicend;
+ List_T p, q;
+ Substring_T x_substring, y_substring;
x_genomicstart = normalize_coord(x->genomicstart,x->alias,x->chrlength);
x_genomicend = normalize_coord(x->genomicend,x->alias,x->chrlength);
@@ -8120,22 +9173,37 @@ duplicate_sort_cmp (const void *a, const void *b) {
return -1;
} else if (x->genomicend > y->genomicend) {
return +1;
- } else if ((cmp = Substring_compare(x->substring1,y->substring1,x->alias,y->alias,x->chrlength,y->chrlength)) != 0) {
- return cmp;
- } else if ((cmp = Substring_compare(x->substring2,y->substring2,x->alias,y->alias,x->chrlength,y->chrlength)) != 0) {
- return cmp;
- } else if ((cmp = Substring_compare(x->substring0,y->substring0,x->alias,y->alias,x->chrlength,y->chrlength)) != 0) {
- return cmp;
- } else if (x->indel_low < y->indel_low) {
- return -1;
- } else if (y->indel_low < x->indel_low) {
- return +1;
- } else if (x->sarrayp == true && y->sarrayp == false) {
- return -1;
- } else if (x->sarrayp == false && y->sarrayp == true) {
- return +1;
+
} else {
- return 0;
+ for (p = x->substrings_1toN, q = y->substrings_1toN; p != NULL && q != NULL; p = List_next(p), q = List_next(q)) {
+ x_substring = (Substring_T) List_head(p);
+ y_substring = (Substring_T) List_head(q);
+ if ((cmp = Substring_compare(x_substring,y_substring,x->alias,y->alias,x->chrlength,y->chrlength)) != 0) {
+ return cmp;
+ }
+ }
+ if (p == NULL && q != NULL) {
+ return -1;
+ } else if (p != NULL && q == NULL) {
+ return +1;
+ }
+
+#if 0
+ /* Need to change to search on junctions */
+ if (x->indel_low < y->indel_low) {
+ return -1;
+ } else if (y->indel_low < x->indel_low) {
+ return +1;
+ }
+#endif
+
+ if (x->sarrayp == true && y->sarrayp == false) {
+ return -1;
+ } else if (x->sarrayp == false && y->sarrayp == true) {
+ return +1;
+ } else {
+ return 0;
+ }
}
}
@@ -8145,6 +9213,8 @@ duplicate_equiv_cmp (const void *a, const void *b) {
int cmp;
T x = * (T *) a;
T y = * (T *) b;
+ List_T p, q;
+ Substring_T x_substring, y_substring;
Univcoord_T x_genomicstart, x_genomicend, y_genomicstart, y_genomicend;
@@ -8168,27 +9238,21 @@ duplicate_equiv_cmp (const void *a, const void *b) {
return -1;
} else if (x_genomicend > y_genomicend) {
return +1;
- } else if ((cmp = Substring_compare(x->substring1,y->substring1,x->alias,y->alias,x->chrlength,y->chrlength)) != 0) {
- return cmp;
- } else if ((cmp = Substring_compare(x->substring2,y->substring2,x->alias,y->alias,x->chrlength,y->chrlength)) != 0) {
- return cmp;
- } else if ((cmp = Substring_compare(x->substring0,y->substring0,x->alias,y->alias,x->chrlength,y->chrlength)) != 0) {
- return cmp;
-#if 0
- } else if (x->indel_low < y->indel_low) {
- return -1;
- } else if (y->indel_low < x->indel_low) {
- return +1;
-#endif
-#if 0
- /* Want to sort by sarrayp, but still consider them equal */
- } else if (x->sarrayp == true && y->sarrayp == false) {
- return -1;
- } else if (x->sarrayp == false && y->sarrayp == true) {
- return +1;
-#endif
} else {
- return 0;
+ for (p = x->substrings_1toN, q = y->substrings_1toN; p != NULL && q != NULL; p = List_next(p), q = List_next(q)) {
+ x_substring = (Substring_T) List_head(p);
+ y_substring = (Substring_T) List_head(q);
+ if ((cmp = Substring_compare(x_substring,y_substring,x->alias,y->alias,x->chrlength,y->chrlength)) != 0) {
+ return cmp;
+ }
+ }
+ if (p == NULL && q != NULL) {
+ return -1;
+ } else if (p != NULL && q == NULL) {
+ return +1;
+ } else {
+ return 0;
+ }
}
}
@@ -8378,30 +9442,14 @@ Stage3end_mark_ambiguous_splices (bool *ambiguousp, List_T hitlist) {
#ifdef DEBUG7
static void
Stage3end_print_substrings (Stage3end_T hit) {
- Substring_print_ends(hit->substring1,hit->chrnum);
- Substring_print_ends(hit->substring2,hit->chrnum);
- Substring_print_ends(hit->substring0,hit->chrnum);
- return;
-}
-#endif
-
-
-#if 0
-static bool
-Stage3end_equal_p (Stage3end_T hit5, Stage3end_T hit3) {
-
- if (Substring_equal_p(hit5->substring1,hit3->substring1) == false) {
- return false;
-
- } else if (Substring_equal_p(hit5->substring2,hit3->substring2) == false) {
- return false;
-
- } else if (Substring_equal_p(hit5->substring0,hit3->substring0) == false) {
- return false;
+ List_T p;
+ Substring_T substring;
- } else {
- return true;
+ for (p = hit->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ Substring_print_ends(substring,hit->chrnum);
}
+ return;
}
#endif
@@ -8418,7 +9466,7 @@ Stage3end_remove_duplicates (List_T hitlist) {
bool *eliminate, eliminatep;
- debug7(printf("Entered Stage3end_remove_duplicates with %d hits\n",n));
+ debug7(printf("Entered Stage3end_remove_duplicates with %d hits\n",List_length(hitlist)));
if ((n = List_length(hitlist)) == 0) {
return NULL;
} else {
@@ -8473,8 +9521,8 @@ Stage3end_remove_duplicates (List_T hitlist) {
debug7(printf(" #%d equivalent to #%d and both used (%p and %p)\n",k,usedi,hits[k],hits[usedi]));
#if 0
/* This doesn't matter anymore. Example from NM_001033853:
-TTGCCCTTGGTCACCCCGATGACGTCGATCATCTCATCCTGCCCAAACACTTGGTTCACAGGTACCTGCTGCTCA
-AGTGATGAATCCAAGAGGCGTTTCTATAAGAATTGGCATAAATCTAAGAAGAAGGCCCACCTGATGGAGATCCAG */
+ TTGCCCTTGGTCACCCCGATGACGTCGATCATCTCATCCTGCCCAAACACTTGGTTCACAGGTACCTGCTGCTCA
+ AGTGATGAATCCAAGAGGCGTTTCTATAAGAATTGGCATAAATCTAAGAAGAAGGCCCACCTGATGGAGATCCAG */
fprintf(stderr,"Duplicates of Stage3end_T both seen\n");
#if 0
/* No longer providing queryseq1 and queryseq2 */
@@ -8531,14 +9579,14 @@ AGTGATGAATCCAAGAGGCGTTTCTATAAGAATTGGCATAAATCTAAGAAGAAGGCCCACCTGATGGAGATCCAG */
for (i = n-1; i >= 0; i--) {
x = hits[i];
if (eliminate[i] == false) {
- debug7(printf(" Keeping #%d:%u..%u, nmatches %d (nindels %d, indel_pos %d, distance %u, chrnum %d) (plusp = %d)\n",
+ debug7(printf(" Keeping #%d:%u..%u, nmatches %d (nindels %d, distance %u, chrnum %d) (plusp = %d)\n",
x->chrnum,x->genomicstart - x->chroffset,x->genomicend - x->chroffset,
- x->nmatches,x->nindels,x->indel_pos,x->distance,x->chrnum,x->plusp));
+ x->nmatches,x->nindels,x->distance,x->chrnum,x->plusp));
hitlist = List_push(hitlist,x);
} else {
- debug7(printf(" Eliminating #%d:%u..%u, nmatches %d (nindels %d, indel_pos %d, distance %u, chrnum %d) (plusp = %d)\n",
+ debug7(printf(" Eliminating #%d:%u..%u, nmatches %d (nindels %d, distance %u, chrnum %d) (plusp = %d)\n",
x->chrnum,x->genomicstart - x->chroffset,x->genomicend - x->chroffset,
- x->nmatches,x->nindels,x->indel_pos,x->distance,x->chrnum,x->plusp));
+ x->nmatches,x->nindels,x->distance,x->chrnum,x->plusp));
Stage3end_free(&x);
}
}
@@ -8643,23 +9691,35 @@ hit_sort_cmp (const void *a, const void *b) {
return -1;
} else if (y->nmatches_posttrim > x->nmatches_posttrim) {
return +1;
- } else if (x->nchimera_novel < y->nchimera_novel) {
- return -1;
- } else if (y->nchimera_novel < x->nchimera_novel) {
- return +1;
#endif
- } else if (x->nchimera_known > y->nchimera_known) {
+
+#if 0
+ } else if (x->nsplices < y->nsplices) {
return -1;
- } else if (y->nchimera_known > x->nchimera_known) {
+ } else if (y->nsplices < x->nsplices) {
return +1;
+#endif
+
} else if (x->hittype < y->hittype) {
return -1;
} else if (y->hittype < x->hittype) {
return +1;
+
+#if 0
+ } else if (y->start_amb_length + y->end_amb_length == 0 &&
+ x->start_amb_length + x->end_amb_length > 0) {
+ return -1;
+ } else if (x->start_amb_length + x->end_amb_length == 0 &&
+ y->start_amb_length + y->end_amb_length > 0) {
+ return +1;
+#endif
+
+#if 0
} else if (x->indel_low < y->indel_low) {
return -1;
} else if (y->indel_low < x->indel_low) {
return +1;
+#endif
} else if (x->sarrayp == true && y->sarrayp == false) {
return -1;
} else if (x->sarrayp == false && y->sarrayp == true) {
@@ -8709,17 +9769,21 @@ hit_equiv_cmp (Stage3end_T x, Stage3end_T y) {
#endif
#if 0
- /* Causes GMAP and non-GMAP to not be recognized as equivalent */
- } else if (x->nchimera_novel < y->nchimera_novel) {
+ /* Causes hits to not be recognized as equivalent */
+ } else if (x->nsplices < y->nsplices) {
return -1;
- } else if (y->nchimera_novel < x->nchimera_novel) {
+ } else if (y->nsplices < x->nsplices) {
return +1;
#endif
- } else if (x->nchimera_known > y->nchimera_known) {
+#if 0
+ } else if (y->start_amb_length + y->end_amb_length == 0 &&
+ x->start_amb_length + x->end_amb_length > 0) {
return -1;
- } else if (y->nchimera_known > x->nchimera_known) {
+ } else if (x->start_amb_length + x->end_amb_length == 0 &&
+ y->start_amb_length + y->end_amb_length > 0) {
return +1;
+#endif
#if 0
} else if (x->indel_low < y->indel_low) {
@@ -8805,29 +9869,30 @@ hit_goodness_cmp (bool *equalp, Stage3end_T hit,
#endif
#if 0
- } else if (hit->nchimera_novel > best_hit->nchimera_novel) {
- debug7(printf(" => %d loses by nchimera_novel\n",k));
+ } else if (hit->nsplices > best_hit->nsplices) {
+ debug7(printf(" => %d loses by nsplices: %d > %d in best\n",k,hit->nsplices,best_hit->nsplices));
return -1;
- } else if (hit->nchimera_novel < best_hit->nchimera_novel) {
- debug7(printf(" => %d wins by nchimera_novel\n",k));
+ } else if (hit->nsplices < best_hit->nsplices) {
+ debug7(printf(" => %d wins by nsplices: %d < %d in best\n",k,hit->nsplices,best_hit->nsplices));
return +1;
#endif
-
- } else if (hit->nchimera_known < best_hit->nchimera_known) {
- debug7(printf(" => %d loses by nchimera_known %d < %d\n",
- k,hit->nchimera_known,best_hit->nchimera_known));
- return -1;
- } else if (hit->nchimera_known > best_hit->nchimera_known) {
- debug7(printf(" => %d wins by nchimera_known\n",k));
- return +1;
-#if 0
} else if (hit->hittype > best_hit->hittype) {
debug7(printf(" => %d loses by hittype\n",k));
return -1;
} else if (hit->hittype < best_hit->hittype) {
debug7(printf(" => %d wins by hittype\n",k));
return +1;
+
+#if 0
+ } else if (hit->start_amb_length + hit->end_amb_length == 0 &&
+ best_hit->start_amb_length + best_hit->end_amb_length > 0) {
+ debug7(printf(" => %d loses by ambiguity\n",k));
+ return -1;
+ } else if (hit->start_amb_length + hit->end_amb_length > 0 &&
+ best_hit->start_amb_length + best_hit->end_amb_length == 0) {
+ debug7(printf(" => %d wins by ambiguity\n",k));
+ return +1;
#endif
} else if (hit->nindels > best_hit->nindels) {
@@ -8844,28 +9909,19 @@ hit_goodness_cmp (bool *equalp, Stage3end_T hit,
debug7(printf(" => %d wins because not distant splice\n",k));
return +1;
- } else if (hit->end_ambiguous_p == true && best_hit->end_ambiguous_p == false) {
- debug7(printf(" => %d loses because end is ambiguous\n",k));
- return -1;
- } else if (hit->end_ambiguous_p == false && best_hit->end_ambiguous_p == true) {
- debug7(printf(" => %d wins because end is not ambiguous\n",k));
- return +1;
-
} else if (finalp == false) {
debug7(printf(" => indistinguishable\n"));
return 0;
} else {
- if (hit->hittype == SPLICE && best_hit->hittype == SPLICE) {
- prob1 = Substring_chimera_prob(hit->substring_donor) + Substring_chimera_prob(hit->substring_acceptor);
- prob2 = Substring_chimera_prob(best_hit->substring_donor) + Substring_chimera_prob(best_hit->substring_acceptor);
- if (prob1 < prob2) {
- debug7(printf(" => %d loses by splice prob %f vs %f\n",k,prob1,prob2));
- return -1;
- } else if (prob1 > prob2) {
- debug7(printf(" => %d wins by splice prob %f vs %f\n",k,prob1,prob2));
- return +1;
- }
+ prob1 = Stage3end_prob(hit);
+ prob2 = Stage3end_prob(best_hit);
+ if (prob1 < prob2) {
+ debug7(printf(" => %d loses by splice prob %f vs %f\n",k,prob1,prob2));
+ return -1;
+ } else if (prob1 > prob2) {
+ debug7(printf(" => %d wins by splice prob %f vs %f\n",k,prob1,prob2));
+ return +1;
}
if (hit->genomiclength > best_hit->genomiclength) {
@@ -9489,10 +10545,10 @@ Stage3end_resolve_multimapping (List_T hits) {
static void
-print_alignment_info (FILE *fp, int nblocks, int score, int mapq_score, bool sarrayp) {
- fprintf(fp,"segs:%d,align_score:%d,mapq:%d",nblocks,score,mapq_score);
+print_alignment_info (Filestring_T fp, int nblocks, int score, int mapq_score, bool sarrayp) {
+ FPRINTF(fp,"segs:%d,align_score:%d,mapq:%d",nblocks,score,mapq_score);
if (sarrayp == true) {
- fprintf(fp,",method:sa");
+ FPRINTF(fp,",method:sa");
}
return;
}
@@ -9571,7 +10627,7 @@ unpaired_type_text (T hit5, T hit3) {
/* Has a copy in pair.c */
static void
-print_pair_info (FILE *fp, T hit5, T hit3, int insertlength, int pairscore,
+print_pair_info (Filestring_T fp, T hit5, T hit3, int insertlength, int pairscore,
Pairtype_T pairtype) {
assert(hit5->effective_chrnum == hit3->effective_chrnum); /* Same chromosomes */
@@ -9581,14 +10637,14 @@ print_pair_info (FILE *fp, T hit5, T hit3, int insertlength, int pairscore,
assert(hit5->plusp == hit3->plusp); /* Same direction */
#endif
- fprintf(fp,"pair_score:%d",pairscore);
- fprintf(fp,",insert_length:%d",insertlength);
+ FPRINTF(fp,"pair_score:%d",pairscore);
+ FPRINTF(fp,",insert_length:%d",insertlength);
switch (pairtype) {
case CONCORDANT: break;
- case PAIRED_SCRAMBLE: fprintf(fp,",pairtype:scramble"); break;
- case PAIRED_INVERSION: fprintf(fp,",pairtype:inversion"); break;
- case PAIRED_TOOLONG: fprintf(fp,",pairtype:toolong"); break;
+ case PAIRED_SCRAMBLE: FPRINTF(fp,",pairtype:scramble"); break;
+ case PAIRED_INVERSION: FPRINTF(fp,",pairtype:inversion"); break;
+ case PAIRED_TOOLONG: FPRINTF(fp,",pairtype:toolong"); break;
case CONCORDANT_TRANSLOCATIONS: break;
case CONCORDANT_TERMINAL: break;
case PAIRED_UNSPECIFIED: abort();
@@ -9599,510 +10655,215 @@ print_pair_info (FILE *fp, T hit5, T hit3, int insertlength, int pairscore,
return;
}
-static void
-print_single (FILE *fp, T this, int score, Univ_IIT_T chromosome_iit, Shortread_T queryseq,
- Shortread_T headerseq, char *acc_suffix, bool invertp, T hit5, T hit3, int insertlength,
- int pairscore, Pairtype_T pairtype, int mapq_score) {
- char *chr;
- bool allocp;
-
- chr = Univ_IIT_label(chromosome_iit,this->chrnum,&allocp);
-
- if (print_m8_p) {
- Substring_print_m8(fp,this->substring1,headerseq,acc_suffix,chr,invertp);
- } else {
- fprintf(fp," ");
- Substring_print_single(fp,this->substring1,queryseq,chr,invertp);
-
- /* Alignment info */
- fprintf(fp,"\t");
- print_alignment_info(fp,/*nblocks*/1,score,mapq_score,this->sarrayp);
-
- /* Pairing info */
- if (hit5 != NULL && hit3 != NULL) {
- fprintf(fp,"\t");
- print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
- }
-
- fprintf(fp,"\n");
- }
-
- if (allocp == true) {
- FREE(chr);
- }
-
- return;
-}
-
static void
-print_insertion (FILE *fp, T this, int score, Univ_IIT_T chromosome_iit, Shortread_T queryseq,
- Shortread_T headerseq, char *acc_suffix, bool invertp, T hit5, T hit3, int insertlength,
- int pairscore, Pairtype_T pairtype, int mapq_score) {
- char *chr;
- bool allocp;
-
- chr = Univ_IIT_label(chromosome_iit,this->chrnum,&allocp);
-
- if (print_m8_p) {
- Substring_print_m8(fp,this->substring1,headerseq,acc_suffix,chr,invertp);
- Substring_print_m8(fp,this->substring2,headerseq,acc_suffix,chr,invertp);
+print_substrings (Filestring_T fp, T this, int score, Univ_IIT_T chromosome_iit, Shortread_T queryseq,
+ Shortread_T headerseq, char *acc_suffix, bool invertp, T hit5, T hit3, int insertlength,
+ int pairscore, Pairtype_T pairtype, int mapq_score) {
+ char *single_chr, *chr;
+ bool allocp, alloc1p, pairinfo_printed_p = false;
+ List_T substrings, junctions, p, q;
+ Substring_T substring;
+ Junction_T pre_junction, post_junction;
+ int nblocks;
+ if (this->chrnum == 0) {
+ single_chr = (char *) NULL;
} else {
- fprintf(fp," ");
- Substring_print_insertion_1(fp,this->substring1,this->substring2,this->nindels,
- queryseq,chr,invertp);
- /* Alignment info */
- fprintf(fp,"\t");
- print_alignment_info(fp,/*nblocks*/2,score,mapq_score,this->sarrayp);
-
- /* Pairing info */
- if (hit5 != NULL && hit3 != NULL) {
- fprintf(fp,"\t");
- print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
- }
-
- fprintf(fp,"\n");
-
-
- fprintf(fp,",");
- Substring_print_insertion_2(fp,this->substring1,this->substring2,this->nindels,
- queryseq,chr,invertp);
- fprintf(fp,"\n");
+ single_chr = Univ_IIT_label(chromosome_iit,this->chrnum,&alloc1p);
}
-
- if (allocp == true) {
- FREE(chr);
+ if (invertp == true) {
+ substrings = this->substrings_Nto1;
+ junctions = this->junctions_Nto1;
+ } else {
+ substrings = this->substrings_1toN;
+ junctions = this->junctions_1toN;
}
- return;
-}
-
-static void
-print_deletion (FILE *fp, T this, int score, Univ_IIT_T chromosome_iit, Shortread_T queryseq,
- Shortread_T headerseq, char *acc_suffix, bool invertp, T hit5, T hit3, int insertlength,
- int pairscore, Pairtype_T pairtype, int mapq_score) {
- char *chr;
- bool allocp;
-
- chr = Univ_IIT_label(chromosome_iit,this->chrnum,&allocp);
-
if (print_m8_p) {
- Substring_print_m8(fp,this->substring1,headerseq,acc_suffix,chr,invertp);
- Substring_print_m8(fp,this->substring2,headerseq,acc_suffix,chr,invertp);
-
- } else {
- fprintf(fp," ");
- Substring_print_deletion_1(fp,this->substring1,this->substring2,this->nindels,this->deletion,
- queryseq,chr,invertp);
- /* Alignment info */
- fprintf(fp,"\t");
- print_alignment_info(fp,/*nblocks*/2,score,mapq_score,this->sarrayp);
-
- /* Pairing info */
- if (hit5 != NULL && hit3 != NULL) {
- fprintf(fp,"\t");
- print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
+ for (p = substrings; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_ambiguous_p(substring) == true) {
+ /* Skip */
+ } else {
+ if ((chr = single_chr) == NULL) {
+ chr = Univ_IIT_label(chromosome_iit,Substring_chrnum(substring),&allocp);
+ }
+ Substring_print_m8(fp,substring,headerseq,acc_suffix,chr,invertp);
+ if (single_chr == NULL && allocp == true) {
+ FREE(chr);
+ }
+ }
}
- fprintf(fp,"\n");
-
- fprintf(fp,",");
- Substring_print_deletion_2(fp,this->substring1,this->substring2,this->nindels,
- queryseq,chr,invertp);
- fprintf(fp,"\n");
- }
-
- if (allocp == true) {
- FREE(chr);
- }
-}
-
-
-static void
-print_splice (FILE *fp, T chimera, int score,
- Univ_IIT_T chromosome_iit, Shortread_T queryseq, Shortread_T headerseq,
- char *acc_suffix, bool invertp, T hit5, T hit3, int insertlength, int pairscore,
- Pairtype_T pairtype, int mapq_score) {
- Substring_T donor, acceptor;
- Chrnum_T chrnum;
- char *chr;
- bool allocp;
-
- if (chimera->hittype == HALFSPLICE_DONOR) {
- donor = chimera->substring_donor;
- acceptor = (Substring_T) NULL;
- Substring_assign_donor_prob(donor);
-
- } else if (chimera->hittype == HALFSPLICE_ACCEPTOR) {
- acceptor = chimera->substring_acceptor;
- donor = (Substring_T) NULL;
- Substring_assign_acceptor_prob(acceptor);
-
} else {
- donor = chimera->substring_donor;
- acceptor = chimera->substring_acceptor;
- Substring_assign_donor_prob(donor);
- Substring_assign_acceptor_prob(acceptor);
- }
-
- if (print_m8_p) {
- if (donor == NULL) {
- chrnum = Substring_chrnum(acceptor);
- chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
- Substring_print_m8(fp,acceptor,headerseq,acc_suffix,chr,invertp);
- if (allocp) FREE(chr);
-
- } else if (acceptor == NULL) {
- chrnum = Substring_chrnum(donor);
- chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
- Substring_print_m8(fp,donor,headerseq,acc_suffix,chr,invertp);
- if (allocp) FREE(chr);
-
+ if ((nblocks = List_length(substrings)) == 1) {
+ post_junction = (Junction_T) NULL;
} else {
- chrnum = Substring_chrnum(donor);
- chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
- Substring_print_m8(fp,donor,headerseq,acc_suffix,chr,invertp);
- Substring_print_m8(fp,acceptor,headerseq,acc_suffix,chr,invertp);
- if (allocp) FREE(chr);
+ post_junction = (Junction_T) List_head(junctions);
}
-
- } else if (donor == NULL) {
- fprintf(fp," ");
- if (chimera->sensedir == SENSE_FORWARD) {
- Substring_print_acceptor(fp,acceptor,/*sensep*/true,invertp,queryseq,
- chromosome_iit,donor,chimera->distance);
- } else {
- Substring_print_acceptor(fp,acceptor,/*sensep*/false,invertp,queryseq,
- chromosome_iit,donor,chimera->distance);
+ substring = (Substring_T) List_head(substrings);
+ if (Substring_ambiguous_p(substring) == true) {
+ nblocks -= 1;
}
-
- /* Alignment info */
- fprintf(fp,"\t");
- print_alignment_info(fp,/*nblocks*/1,score,mapq_score,chimera->sarrayp);
-
- /* Pairing info */
- if (hit5 != NULL && hit3 != NULL) {
- fprintf(fp,"\t");
- print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
+ substring = (Substring_T) List_last_value(substrings);
+ if (Substring_ambiguous_p(substring) == true) {
+ nblocks -= 1;
}
- fprintf(fp,"\n");
-
- } else if (acceptor == NULL) {
- fprintf(fp," ");
- if (chimera->sensedir == SENSE_FORWARD) {
- Substring_print_donor(fp,donor,/*sensep*/true,invertp,
- queryseq,chromosome_iit,acceptor,chimera->distance);
- } else {
- Substring_print_donor(fp,donor,/*sensep*/false,invertp,
- queryseq,chromosome_iit,acceptor,chimera->distance);
- }
-
- /* Alignment info */
- fprintf(fp,"\t");
- print_alignment_info(fp,/*nblocks*/1,score,mapq_score,chimera->sarrayp);
-
- /* Pairing info */
- if (hit5 != NULL && hit3 != NULL) {
- fprintf(fp,"\t");
- print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
- }
- fprintf(fp,"\n");
-
- } else if (chimera->sensedir == SENSE_FORWARD && invertp == false) {
- fprintf(fp," ");
- Substring_print_donor(fp,donor,/*sensep*/true,/*invertp*/false,
- queryseq,chromosome_iit,acceptor,chimera->distance);
- /* Alignment info */
- fprintf(fp,"\t");
- print_alignment_info(fp,/*nblocks*/2,score,mapq_score,chimera->sarrayp);
-
- /* Pairing info */
- if (hit5 != NULL && hit3 != NULL) {
- fprintf(fp,"\t");
- print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,",");
- Substring_print_acceptor(fp,acceptor,/*sensep*/true,/*invertp*/false,queryseq,
- chromosome_iit,donor,chimera->distance);
- fprintf(fp,"\n");
-
- } else if (chimera->sensedir == SENSE_FORWARD && invertp == true) {
- fprintf(fp," ");
- Substring_print_acceptor(fp,acceptor,/*sensep*/true,/*invertp*/true,queryseq,
- chromosome_iit,donor,chimera->distance);
- /* Alignment info */
- fprintf(fp,"\t");
- print_alignment_info(fp,/*nblocks*/2,score,mapq_score,chimera->sarrayp);
-
- /* Pairing info */
- if (hit5 != NULL && hit3 != NULL) {
- fprintf(fp,"\t");
- print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,",");
- Substring_print_donor(fp,donor,/*sensep*/true,/*invertp*/true,queryseq,
- chromosome_iit,acceptor,chimera->distance);
- fprintf(fp,"\n");
-
- } else if (chimera->sensedir == SENSE_ANTI && invertp == false) {
- fprintf(fp," ");
- Substring_print_acceptor(fp,acceptor,/*sensep*/false,/*invertp*/false,queryseq,
- chromosome_iit,donor,chimera->distance);
- /* Alignment info */
- fprintf(fp,"\t");
- print_alignment_info(fp,/*nblocks*/2,score,mapq_score,chimera->sarrayp);
-
- /* Pairing info */
- if (hit5 != NULL && hit3 != NULL) {
- fprintf(fp,"\t");
- print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,",");
- Substring_print_donor(fp,donor,/*sensep*/false,/*invertp*/false,queryseq,
- chromosome_iit,acceptor,chimera->distance);
- fprintf(fp,"\n");
-
- } else if (chimera->sensedir == SENSE_ANTI && invertp == true) {
- fprintf(fp," ");
- Substring_print_donor(fp,donor,/*sensep*/false,/*invertp*/true,queryseq,
- chromosome_iit,acceptor,chimera->distance);
- /* Alignment info */
- fprintf(fp,"\t");
- print_alignment_info(fp,/*nblocks*/2,score,mapq_score,chimera->sarrayp);
-
- /* Pairing info */
- if (hit5 != NULL && hit3 != NULL) {
- fprintf(fp,"\t");
- print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,",");
- Substring_print_acceptor(fp,acceptor,/*sensep*/false,/*invertp*/true,queryseq,
- chromosome_iit,donor,chimera->distance);
- fprintf(fp,"\n");
- }
-
- return;
-}
-
-
-static void
-print_shortexon (FILE *fp, T chimera, int score,
- Univ_IIT_T chromosome_iit, Shortread_T queryseq, Shortread_T headerseq,
- bool invertp, T hit5, T hit3, int insertlength, int pairscore,
- Pairtype_T pairtype, int mapq_score) {
- Substring_T donor, acceptor, shortexon;
- Chrpos_T distance1, distance2;
- bool firstp = true;
- int nblocks = 1;
-
- shortexon = chimera->substring1;
- Substring_assign_shortexon_prob(shortexon);
- if ((donor = chimera->substringD) != NULL) {
- Substring_assign_donor_prob(donor);
- nblocks++;
- }
- if ((acceptor = chimera->substringA) != NULL) {
- Substring_assign_acceptor_prob(acceptor);
- nblocks++;
- }
- if (chimera->sensedir == SENSE_FORWARD && invertp == false) {
- distance1 = chimera->shortexonA_distance;
- distance2 = chimera->shortexonD_distance;
-
- if (donor != NULL) {
- fprintf(fp," ");
- Substring_print_donor(fp,donor,/*sensep*/true,/*invertp*/false,
- queryseq,chromosome_iit,acceptor,distance1);
- fprintf(fp,"\t"); print_alignment_info(fp,nblocks,score,mapq_score,chimera->sarrayp);
- if (hit5 != NULL && hit3 != NULL) {
- fprintf(fp,"\t"); print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
- }
- firstp = false;
- fprintf(fp,"\n");
- }
-
- if (firstp == true) { fprintf(fp," "); } else { fprintf(fp,","); }
- Substring_print_shortexon(fp,shortexon,/*sensep*/true,/*invertp*/false,queryseq,
- chromosome_iit,distance1,distance2);
- if (firstp == true) {
- fprintf(fp,"\t"); print_alignment_info(fp,nblocks,score,mapq_score,chimera->sarrayp);
- if (hit5 != NULL && hit3 != NULL) {
- fprintf(fp,"\t"); print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
+ /* First line */
+ substring = (Substring_T) List_head(substrings);
+ if (Substring_ambiguous_p(substring) == true) {
+ /* Skip */
+ } else {
+ if ((chr = single_chr) == NULL) {
+ chr = Univ_IIT_label(chromosome_iit,Substring_chrnum(substring),&allocp);
}
- }
- fprintf(fp,"\n");
-
- if (acceptor != NULL) {
- fprintf(fp,",");
- Substring_print_acceptor(fp,acceptor,/*sensep*/true,/*invertp*/false,queryseq,
- chromosome_iit,donor,distance2);
- fprintf(fp,"\n");
- }
-
- } else if (chimera->sensedir == SENSE_FORWARD && invertp == true) {
- distance1 = chimera->shortexonD_distance;
- distance2 = chimera->shortexonA_distance;
-
- if (acceptor != NULL) {
- fprintf(fp," ");
- Substring_print_acceptor(fp,acceptor,/*sensep*/true,/*invertp*/true,queryseq,
- chromosome_iit,donor,distance1);
- fprintf(fp,"\t"); print_alignment_info(fp,nblocks,score,mapq_score,chimera->sarrayp);
- if (hit5 != NULL && hit3 != NULL) {
- fprintf(fp,"\t"); print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
+ FPRINTF(fp," ");
+ Substring_print_alignment(fp,/*pre_junction*/NULL,substring,post_junction,queryseq,genome,chr,invertp);
+ if (single_chr == NULL && allocp == true) {
+ FREE(chr);
}
- firstp = false;
- fprintf(fp,"\n");
- }
- if (firstp == true) { fprintf(fp," "); } else { fprintf(fp,","); }
- Substring_print_shortexon(fp,shortexon,/*sensep*/true,/*invertp*/true,queryseq,
- chromosome_iit,distance1,distance2);
- if (firstp == true) {
- fprintf(fp,"\t"); print_alignment_info(fp,nblocks,score,mapq_score,chimera->sarrayp);
+ /* Alignment info */
+ FPRINTF(fp,"\t");
+ print_alignment_info(fp,nblocks,score,mapq_score,this->sarrayp);
+
+ /* Pairing info */
if (hit5 != NULL && hit3 != NULL) {
- fprintf(fp,"\t"); print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
+ FPRINTF(fp,"\t");
+ print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
}
- }
- fprintf(fp,"\n");
+ pairinfo_printed_p = true;
- if (donor != NULL) {
- fprintf(fp,",");
- Substring_print_donor(fp,donor,/*sensep*/true,/*invertp*/true,queryseq,
- chromosome_iit,acceptor,distance2);
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
}
- } else if (chimera->sensedir == SENSE_ANTI && invertp == false) {
- distance1 = chimera->shortexonD_distance;
- distance2 = chimera->shortexonA_distance;
+ if ((p = List_next(substrings)) == NULL) {
+ /* Done */
+ } else {
+ /* Middle lines */
+ for (q = List_next(junctions); q != NULL; p = List_next(p), q = List_next(q)) {
+ pre_junction = post_junction;
+ post_junction = List_head(q);
- if (acceptor != NULL) {
- fprintf(fp," ");
- Substring_print_acceptor(fp,acceptor,/*sensep*/false,/*invertp*/false,queryseq,
- chromosome_iit,donor,distance1);
- fprintf(fp,"\t"); print_alignment_info(fp,nblocks,score,mapq_score,chimera->sarrayp);
- if (hit5 != NULL && hit3 != NULL) {
- fprintf(fp,"\t"); print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
- }
- firstp = false;
- fprintf(fp,"\n");
- }
+ substring = (Substring_T) List_head(p);
+ if (Substring_ambiguous_p(substring) == true) {
+ /* Skip */
+ } else {
+ if (pairinfo_printed_p == true) {
+ FPRINTF(fp,",");
+ } else {
+ FPRINTF(fp," ");
+ }
+ if ((chr = single_chr) == NULL) {
+ chr = Univ_IIT_label(chromosome_iit,Substring_chrnum(substring),&allocp);
+ }
+ Substring_print_alignment(fp,pre_junction,substring,post_junction,queryseq,genome,chr,invertp);
+ if (single_chr == NULL && allocp == true) {
+ FREE(chr);
+ }
- if (firstp == true) { fprintf(fp," "); } else { fprintf(fp,","); }
- Substring_print_shortexon(fp,shortexon,/*sensep*/false,/*invertp*/false,queryseq,
- chromosome_iit,distance1,distance2);
- if (firstp == true) {
- fprintf(fp,"\t"); print_alignment_info(fp,nblocks,score,mapq_score,chimera->sarrayp);
- if (hit5 != NULL && hit3 != NULL) {
- fprintf(fp,"\t"); print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
+ if (pairinfo_printed_p == false) {
+ /* Alignment info */
+ FPRINTF(fp,"\t");
+ print_alignment_info(fp,nblocks,score,mapq_score,this->sarrayp);
+
+ /* Pairing info */
+ if (hit5 != NULL && hit3 != NULL) {
+ FPRINTF(fp,"\t");
+ print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
+ }
+ pairinfo_printed_p = true;
+ }
+ FPRINTF(fp,"\n");
+ }
}
- }
- fprintf(fp,"\n");
-
- if (donor != NULL) {
- fprintf(fp,",");
- Substring_print_donor(fp,donor,/*sensep*/false,/*invertp*/false,queryseq,
- chromosome_iit,acceptor,distance2);
- fprintf(fp,"\n");
- }
- } else if (chimera->sensedir == SENSE_ANTI && invertp == true) {
- distance2 = chimera->shortexonD_distance;
- distance1 = chimera->shortexonA_distance;
+ /* Last line */
+ pre_junction = post_junction;
- if (donor != NULL) {
- fprintf(fp," ");
- Substring_print_donor(fp,donor,/*sensep*/false,/*invertp*/true,queryseq,
- chromosome_iit,acceptor,distance1);
- fprintf(fp,"\t"); print_alignment_info(fp,nblocks,score,mapq_score,chimera->sarrayp);
- if (hit5 != NULL && hit3 != NULL) {
- fprintf(fp,"\t"); print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
- }
- firstp = false;
- fprintf(fp,"\n");
- }
+ substring = (Substring_T) List_head(p);
+ if (Substring_ambiguous_p(substring) == true) {
+ /* Skip */
+ } else {
+ if (pairinfo_printed_p == true) {
+ FPRINTF(fp,",");
+ } else {
+ FPRINTF(fp," ");
+ }
+ if ((chr = single_chr) == NULL) {
+ chr = Univ_IIT_label(chromosome_iit,Substring_chrnum(substring),&allocp);
+ }
+ Substring_print_alignment(fp,pre_junction,substring,/*post_junction*/NULL,queryseq,genome,chr,invertp);
+ if (single_chr == NULL && allocp == true) {
+ FREE(chr);
+ }
- if (firstp == true) { fprintf(fp," "); } else { fprintf(fp,","); }
- Substring_print_shortexon(fp,shortexon,/*sensep*/false,/*invertp*/true,queryseq,
- chromosome_iit,distance1,distance2);
- if (firstp == true) {
- fprintf(fp,"\t"); print_alignment_info(fp,nblocks,score,mapq_score,chimera->sarrayp);
- if (hit5 != NULL && hit3 != NULL) {
- fprintf(fp,"\t"); print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
+ if (pairinfo_printed_p == false) {
+ /* Alignment info */
+ FPRINTF(fp,"\t");
+ print_alignment_info(fp,nblocks,score,mapq_score,this->sarrayp);
+
+ /* Pairing info */
+ if (hit5 != NULL && hit3 != NULL) {
+ FPRINTF(fp,"\t");
+ print_pair_info(fp,hit5,hit3,insertlength,pairscore,pairtype);
+ }
+ /* pairinfo_printed_p = true; */
+ }
+ FPRINTF(fp,"\n");
}
}
- fprintf(fp,"\n");
-
- if (acceptor != NULL) {
- fprintf(fp,",");
- Substring_print_acceptor(fp,acceptor,/*sensep*/false,/*invertp*/true,queryseq,
- chromosome_iit,donor,distance2);
- fprintf(fp,"\n");
- }
}
- return;
+ if (alloc1p == true) {
+ FREE(single_chr);
+ }
}
/* May substitute paired-end loglik for single-end loglik */
void
-Stage3end_print (FILE *fp, T this, int score,
+Stage3end_print (Filestring_T fp, T this, int score,
Univ_IIT_T chromosome_iit, Shortread_T queryseq, Shortread_T headerseq,
char *acc_suffix, bool invertp, T hit5, T hit3, int insertlength,
int pairscore, Pairtype_T pairtype, int mapq_score) {
+ bool pairedp;
- if (this->hittype == EXACT || this->hittype == SUB || this->hittype == TERMINAL) {
- print_single(fp,this,score,chromosome_iit,queryseq,headerseq,acc_suffix,invertp,
- hit5,hit3,insertlength,pairscore,pairtype,mapq_score);
- } else if (this->hittype == INSERTION) {
- print_insertion(fp,this,score,chromosome_iit,queryseq,headerseq,acc_suffix,invertp,
- hit5,hit3,insertlength,pairscore,pairtype,mapq_score);
- } else if (this->hittype == DELETION) {
- print_deletion(fp,this,score,chromosome_iit,queryseq,headerseq,acc_suffix,invertp,
- hit5,hit3,insertlength,pairscore,pairtype,mapq_score);
- } else if (this->hittype == HALFSPLICE_DONOR || this->hittype == HALFSPLICE_ACCEPTOR ||
- this->hittype == SPLICE || this->hittype == SAMECHR_SPLICE || this->hittype == TRANSLOC_SPLICE) {
- print_splice(fp,this,score,
- chromosome_iit,queryseq,headerseq,acc_suffix,invertp,hit5,hit3,insertlength,
- pairscore,pairtype,mapq_score);
- } else if (this->hittype == ONE_THIRD_SHORTEXON || this->hittype == TWO_THIRDS_SHORTEXON || this->hittype == SHORTEXON) {
- print_shortexon(fp,this,score,
- chromosome_iit,queryseq,headerseq,invertp,hit5,hit3,insertlength,
- pairscore,pairtype,mapq_score);
- } else if (this->hittype == GMAP) {
+ if (this->hittype == GMAP) {
if (print_m8_p) {
Pair_print_m8(fp,this->pairarray,this->npairs,/*invertedp*/false,
this->chrnum,queryseq,headerseq,acc_suffix,chromosome_iit);
} else if (Shortread_invertedp(queryseq) == false) {
+ if (pairtype == UNPAIRED) {
+ pairedp = false;
+ } else {
+ pairedp = true;
+ }
Substring_print_gmap(fp,this->pairarray,this->npairs,this->nsegments,/*invertedp*/false,
this->gmap_start_endtype,this->gmap_end_endtype,
this->chrnum,this->chroffset,this->chrhigh,Shortread_fulllength(queryseq),
this->plusp,this->gmap_cdna_direction,this->score,insertlength,pairscore,mapq_score,
- chromosome_iit);
+ chromosome_iit,pairedp,this->gmap_source);
} else {
+ if (pairtype == UNPAIRED) {
+ pairedp = false;
+ } else {
+ pairedp = true;
+ }
Substring_print_gmap(fp,this->pairarray,this->npairs,this->nsegments,/*invertedp*/true,
this->gmap_end_endtype,this->gmap_start_endtype,
this->chrnum,this->chroffset,this->chrhigh,Shortread_fulllength(queryseq),
this->plusp,this->gmap_cdna_direction,this->score,insertlength,pairscore,mapq_score,
- chromosome_iit);
+ chromosome_iit,pairedp,this->gmap_source);
}
} else {
- abort();
+ print_substrings(fp,this,score,chromosome_iit,queryseq,headerseq,acc_suffix,invertp,
+ hit5,hit3,insertlength,pairscore,pairtype,mapq_score);
}
return;
@@ -10110,8 +10871,8 @@ Stage3end_print (FILE *fp, T this, int score,
static void
-print_query_header (FILE *fp, char initchar, Shortread_T queryseq, bool invertp) {
- fprintf(fp,"%c",initchar);
+print_query_header (Filestring_T fp, char initchar, Shortread_T queryseq, bool invertp) {
+ FPRINTF(fp,"%c",initchar);
if (invertp == false) {
Shortread_print_oneline(fp,queryseq);
} else {
@@ -10124,15 +10885,15 @@ print_query_header (FILE *fp, char initchar, Shortread_T queryseq, bool invertp)
static void
-print_barcode_and_quality (FILE *fp, Shortread_T queryseq, bool invertp, int quality_shift) {
+print_barcode_and_quality (Filestring_T fp, Shortread_T queryseq, bool invertp, int quality_shift) {
char *barcode;
if ((barcode = Shortread_barcode(queryseq)) != NULL) {
- fprintf(fp,"\tbarcode:%s",barcode);
+ FPRINTF(fp,"\tbarcode:%s",barcode);
}
if (Shortread_quality_string(queryseq) != NULL) {
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
if (invertp == false) {
Shortread_print_quality(fp,queryseq,/*hardclip_low*/0,/*hardclip_high*/0,
quality_shift,/*show_chopped_p*/true);
@@ -10146,8 +10907,9 @@ print_barcode_and_quality (FILE *fp, Shortread_T queryseq, bool invertp, int qua
}
-static void
-print_one_paired_end (Result_T result, Resulttype_T resulttype,
+void
+Stage3pair_print_end (Filestring_T fp, Filestring_T fp_failedinput,
+ Result_T result, Resulttype_T resulttype,
char initchar, bool firstp, Univ_IIT_T chromosome_iit,
Shortread_T queryseq, Shortread_T headerseq1, Shortread_T headerseq2,
int maxpaths, bool quiet_if_excessive_p,
@@ -10155,21 +10917,22 @@ print_one_paired_end (Result_T result, Resulttype_T resulttype,
Stage3pair_T *stage3pairarray, stage3pair;
T *stage3array, *stage3array_mate, this, hit5, hit3;
int npaths, npaths_mate, pathnum, first_absmq, second_absmq;
- bool outputp, translocationp;
- FILE *fp;
+ bool excessivep, translocationp;
+
if (resulttype == PAIREDEND_NOMAPPING) {
if (print_m8_p == false) {
- /* If failedinput_root != NULL, then this case is handled by calling procedure */
- print_query_header(fp_nomapping,initchar,queryseq,invertp);
- fprintf(fp_nomapping,"\t0 %s",UNPAIRED_TEXT);
+ Filestring_set_split_output(fp,OUTPUT_NM);
+ print_query_header(fp,initchar,queryseq,invertp);
+ FPRINTF(fp,"\t0 %s",UNPAIRED_TEXT);
- print_barcode_and_quality(fp_nomapping,queryseq,invertp,quality_shift);
+ print_barcode_and_quality(fp,queryseq,invertp,quality_shift);
- fprintf(fp_nomapping,"\t");
- Shortread_print_header(fp_nomapping,headerseq1,headerseq2);
- fprintf(fp_nomapping,"\n");
+ FPRINTF(fp,"\t");
+ Shortread_print_header(fp,headerseq1,headerseq2);
+ FPRINTF(fp,"\n");
}
+ /* If failedinput_root != NULL, then this case is handled by calling procedure */
} else if (resulttype == CONCORDANT_UNIQ) {
stage3pairarray = (Stage3pair_T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
@@ -10178,25 +10941,22 @@ print_one_paired_end (Result_T result, Resulttype_T resulttype,
hit3 = stage3pair->hit3;
if (stage3pair->circularp == true) {
- fp = fp_concordant_circular;
+ Filestring_set_split_output(fp,OUTPUT_CC);
} else {
- fp = fp_concordant_uniq;
+ Filestring_set_split_output(fp,OUTPUT_CU);
}
if (print_m8_p == false) {
print_query_header(fp,initchar,queryseq,invertp);
- fprintf(fp,"\t1 %s",CONCORDANT_TEXT);
+ FPRINTF(fp,"\t1 %s",CONCORDANT_TEXT);
print_barcode_and_quality(fp,queryseq,invertp,quality_shift);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
Shortread_print_header(fp,headerseq1,headerseq2);
}
if (firstp == true) {
-#if 0
- Stage3pair_eval(stage3pairarray,/*npaths*/1,maxpaths,queryseq,queryseq_mate);
-#endif
Stage3end_print(fp,hit5,hit5->score,
chromosome_iit,queryseq,headerseq1,/*acc_suffix*/"/1",
invertp,hit5,hit3,stage3pair->insertlength,
@@ -10209,66 +10969,57 @@ print_one_paired_end (Result_T result, Resulttype_T resulttype,
}
if (print_m8_p == false) {
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
}
} else if (resulttype == CONCORDANT_TRANSLOC) {
+ Filestring_set_split_output(fp,OUTPUT_CT);
stage3pairarray = (Stage3pair_T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
if (quiet_if_excessive_p && npaths > maxpaths) {
if (print_m8_p == false) {
/* No xs category for transloc, so ignore quiet-if-excessive_p */
- print_query_header(fp_concordant_transloc,initchar,queryseq,invertp);
- fprintf(fp_concordant_transloc,"\t%d %s",npaths,CONCORDANT_TEXT);
- fprintf(fp_concordant_transloc," (transloc)");
+ print_query_header(fp,initchar,queryseq,invertp);
+ FPRINTF(fp,"\t%d %s",npaths,CONCORDANT_TEXT);
+ FPRINTF(fp," (transloc)");
- print_barcode_and_quality(fp_concordant_transloc,queryseq,invertp,quality_shift);
+ print_barcode_and_quality(fp,queryseq,invertp,quality_shift);
- fprintf(fp_concordant_transloc,"\t");
- Shortread_print_header(fp_concordant_transloc,headerseq1,headerseq2);
+ FPRINTF(fp,"\t");
+ Shortread_print_header(fp,headerseq1,headerseq2);
/* No further output */
- fprintf(fp_concordant_transloc,"\n");
+ FPRINTF(fp,"\n");
+ }
- if (failedinput_root != NULL) {
- if (fastq_format_p == true) {
- Shortread_print_query_singleend_fastq(firstp == true ? fp_failedinput_1 : fp_failedinput_2,queryseq,headerseq1);
- } else {
- Shortread_print_query_singleend_fasta(firstp == true ? fp_failedinput_1 : fp_failedinput_2,queryseq,headerseq1);
- }
- }
+ if (failedinput_root != NULL) {
+ Shortread_print_query_singleend(fp_failedinput,queryseq,headerseq1);
}
} else {
if (print_m8_p == false) {
- print_query_header(fp_concordant_transloc,initchar,queryseq,invertp);
- fprintf(fp_concordant_transloc,"\t%d %s",npaths,CONCORDANT_TEXT);
- fprintf(fp_concordant_transloc," (transloc)");
+ print_query_header(fp,initchar,queryseq,invertp);
+ FPRINTF(fp,"\t%d %s",npaths,CONCORDANT_TEXT);
+ FPRINTF(fp," (transloc)");
- print_barcode_and_quality(fp_concordant_transloc,queryseq,invertp,quality_shift);
+ print_barcode_and_quality(fp,queryseq,invertp,quality_shift);
- fprintf(fp_concordant_transloc,"\t");
- Shortread_print_header(fp_concordant_transloc,headerseq1,headerseq2);
+ FPRINTF(fp,"\t");
+ Shortread_print_header(fp,headerseq1,headerseq2);
}
-#if 0
- if (firstp == true) {
- Stage3pair_eval(stage3pairarray,npaths,maxpaths,queryseq,queryseq_mate);
- }
-#endif
-
for (pathnum = 1; pathnum <= npaths && pathnum <= maxpaths; pathnum++) {
stage3pair = stage3pairarray[pathnum-1];
hit5 = stage3pair->hit5;
hit3 = stage3pair->hit3;
if (firstp == true) {
- Stage3end_print(fp_concordant_transloc,hit5,hit5->score,
+ Stage3end_print(fp,hit5,hit5->score,
chromosome_iit,queryseq,headerseq1,/*acc_suffix*/"/1",
invertp,hit5,hit3,stage3pair->insertlength,
stage3pair->score,stage3pair->pairtype,stage3pair->mapq_score);
} else {
- Stage3end_print(fp_concordant_transloc,hit3,hit3->score,
+ Stage3end_print(fp,hit3,hit3->score,
chromosome_iit,queryseq,headerseq1,/*acc_suffix*/"/2",
invertp,hit5,hit3,stage3pair->insertlength,
stage3pair->score,stage3pair->pairtype,stage3pair->mapq_score);
@@ -10276,7 +11027,7 @@ print_one_paired_end (Result_T result, Resulttype_T resulttype,
}
if (print_m8_p == false) {
- fprintf(fp_concordant_transloc,"\n");
+ FPRINTF(fp,"\n");
}
}
@@ -10285,43 +11036,35 @@ print_one_paired_end (Result_T result, Resulttype_T resulttype,
stage3pairarray = (Stage3pair_T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
if (quiet_if_excessive_p && npaths > maxpaths) {
+ Filestring_set_split_output(fp,OUTPUT_CX);
if (print_m8_p == false) {
- print_query_header(fp_concordant_mult_xs_1,initchar,queryseq,invertp);
- fprintf(fp_concordant_mult_xs_1,"\t%d %s",npaths,CONCORDANT_TEXT);
+ print_query_header(fp,initchar,queryseq,invertp);
+ FPRINTF(fp,"\t%d %s",npaths,CONCORDANT_TEXT);
- print_barcode_and_quality(fp_concordant_mult_xs_1,queryseq,invertp,quality_shift);
+ print_barcode_and_quality(fp,queryseq,invertp,quality_shift);
- fprintf(fp_concordant_mult_xs_1,"\t");
- Shortread_print_header(fp_concordant_mult_xs_1,headerseq1,headerseq2);
+ FPRINTF(fp,"\t");
+ Shortread_print_header(fp,headerseq1,headerseq2);
/* No further output */
- fprintf(fp_concordant_mult_xs_1,"\n");
+ FPRINTF(fp,"\n");
if (failedinput_root != NULL) {
- if (fastq_format_p == true) {
- Shortread_print_query_singleend_fastq(firstp == true ? fp_failedinput_1 : fp_failedinput_2,queryseq,headerseq1);
- } else {
- Shortread_print_query_singleend_fasta(firstp == true ? fp_failedinput_1 : fp_failedinput_2,queryseq,headerseq1);
- }
+ Shortread_print_query_singleend(fp_failedinput,queryseq,headerseq1);
}
}
} else {
+ Filestring_set_split_output(fp,OUTPUT_CM);
if (print_m8_p == false) {
- print_query_header(fp_concordant_mult,initchar,queryseq,invertp);
- fprintf(fp_concordant_mult,"\t%d %s",npaths,CONCORDANT_TEXT);
-
- print_barcode_and_quality(fp_concordant_mult,queryseq,invertp,quality_shift);
-
- fprintf(fp_concordant_mult,"\t");
- Shortread_print_header(fp_concordant_mult,headerseq1,headerseq2);
- }
+ print_query_header(fp,initchar,queryseq,invertp);
+ FPRINTF(fp,"\t%d %s",npaths,CONCORDANT_TEXT);
+
+ print_barcode_and_quality(fp,queryseq,invertp,quality_shift);
-#if 0
- if (firstp == true) {
- Stage3pair_eval(stage3pairarray,npaths,maxpaths,queryseq,queryseq_mate);
+ FPRINTF(fp,"\t");
+ Shortread_print_header(fp,headerseq1,headerseq2);
}
-#endif
for (pathnum = 1; pathnum <= npaths && pathnum <= maxpaths; pathnum++) {
stage3pair = stage3pairarray[pathnum-1];
@@ -10329,12 +11072,12 @@ print_one_paired_end (Result_T result, Resulttype_T resulttype,
hit3 = stage3pair->hit3;
if (firstp == true) {
- Stage3end_print(fp_concordant_mult,hit5,hit5->score,
+ Stage3end_print(fp,hit5,hit5->score,
chromosome_iit,queryseq,headerseq1,/*acc_suffix*/"/1",
invertp,hit5,hit3,stage3pair->insertlength,
stage3pair->score,stage3pair->pairtype,stage3pair->mapq_score);
} else {
- Stage3end_print(fp_concordant_mult,hit3,hit3->score,
+ Stage3end_print(fp,hit3,hit3->score,
chromosome_iit,queryseq,headerseq1,/*acc_suffix*/"/2",
invertp,hit5,hit3,stage3pair->insertlength,
stage3pair->score,stage3pair->pairtype,stage3pair->mapq_score);
@@ -10342,7 +11085,7 @@ print_one_paired_end (Result_T result, Resulttype_T resulttype,
}
if (print_m8_p == false) {
- fprintf(fp_concordant_mult,"\n");
+ FPRINTF(fp,"\n");
}
}
@@ -10352,13 +11095,13 @@ print_one_paired_end (Result_T result, Resulttype_T resulttype,
stage3pair = stage3pairarray[0];
if (stage3pair->circularp == true) {
- fp = fp_paired_uniq_circular;
+ Filestring_set_split_output(fp,OUTPUT_PC);
} else if (stage3pair->pairtype == PAIRED_INVERSION) {
- fp = fp_paired_uniq_inv;
+ Filestring_set_split_output(fp,OUTPUT_PI);
} else if (stage3pair->pairtype == PAIRED_SCRAMBLE) {
- fp = fp_paired_uniq_scr;
+ Filestring_set_split_output(fp,OUTPUT_PS);
} else if (stage3pair->pairtype == PAIRED_TOOLONG) {
- fp = fp_paired_uniq_long;
+ Filestring_set_split_output(fp,OUTPUT_PL);
} else {
fprintf(stderr,"Unexpected pairtype %d\n",stage3pair->pairtype);
abort();
@@ -10366,11 +11109,11 @@ print_one_paired_end (Result_T result, Resulttype_T resulttype,
if (print_m8_p == false) {
print_query_header(fp,initchar,queryseq,invertp);
- fprintf(fp,"\t1 %s",PAIRED_TEXT);
+ FPRINTF(fp,"\t1 %s",PAIRED_TEXT);
print_barcode_and_quality(fp,queryseq,invertp,quality_shift);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
Shortread_print_header(fp,headerseq1,headerseq2);
}
@@ -10378,9 +11121,6 @@ print_one_paired_end (Result_T result, Resulttype_T resulttype,
hit3 = stage3pair->hit3;
if (firstp == true) {
-#if 0
- Stage3pair_eval(stage3pairarray,/*npaths*/1,maxpaths,queryseq,queryseq_mate);
-#endif
Stage3end_print(fp,hit5,hit5->score,
chromosome_iit,queryseq,headerseq1,/*acc_suffix*/"/1",
invertp,hit5,hit3,stage3pair->insertlength,
@@ -10393,50 +11133,42 @@ print_one_paired_end (Result_T result, Resulttype_T resulttype,
}
if (print_m8_p == false) {
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
}
} else if (resulttype == PAIRED_MULT) {
stage3pairarray = (Stage3pair_T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
if (quiet_if_excessive_p && npaths > maxpaths) {
+ Filestring_set_split_output(fp,OUTPUT_PX);
if (print_m8_p == false) {
- print_query_header(fp_paired_mult_xs_1,initchar,queryseq,invertp);
- fprintf(fp_paired_mult_xs_1,"\t%d %s",npaths,PAIRED_TEXT);
+ print_query_header(fp,initchar,queryseq,invertp);
+ FPRINTF(fp,"\t%d %s",npaths,PAIRED_TEXT);
- print_barcode_and_quality(fp_paired_mult_xs_1,queryseq,invertp,quality_shift);
+ print_barcode_and_quality(fp,queryseq,invertp,quality_shift);
- fprintf(fp_paired_mult_xs_1,"\t");
- Shortread_print_header(fp_paired_mult_xs_1,headerseq1,headerseq2);
+ FPRINTF(fp,"\t");
+ Shortread_print_header(fp,headerseq1,headerseq2);
/* No further output */
- fprintf(fp_paired_mult_xs_1,"\n");
+ FPRINTF(fp,"\n");
if (failedinput_root != NULL) {
- if (fastq_format_p == true) {
- Shortread_print_query_singleend_fastq(firstp == true ? fp_failedinput_1 : fp_failedinput_2,queryseq,headerseq1);
- } else {
- Shortread_print_query_singleend_fasta(firstp == true ? fp_failedinput_1 : fp_failedinput_2,queryseq,headerseq1);
- }
+ Shortread_print_query_singleend(fp_failedinput,queryseq,headerseq1);
}
}
} else {
+ Filestring_set_split_output(fp,OUTPUT_PM);
if (print_m8_p == false) {
- print_query_header(fp_paired_mult,initchar,queryseq,invertp);
- fprintf(fp_paired_mult,"\t%d %s",npaths,PAIRED_TEXT);
+ print_query_header(fp,initchar,queryseq,invertp);
+ FPRINTF(fp,"\t%d %s",npaths,PAIRED_TEXT);
- print_barcode_and_quality(fp_paired_mult,queryseq,invertp,quality_shift);
-
- fprintf(fp_paired_mult,"\t");
- Shortread_print_header(fp_paired_mult,headerseq1,headerseq2);
- }
+ print_barcode_and_quality(fp,queryseq,invertp,quality_shift);
-#if 0
- if (firstp == true) {
- Stage3pair_eval(stage3pairarray,npaths,maxpaths,queryseq,queryseq_mate);
+ FPRINTF(fp,"\t");
+ Shortread_print_header(fp,headerseq1,headerseq2);
}
-#endif
for (pathnum = 1; pathnum <= npaths && pathnum <= maxpaths; pathnum++) {
stage3pair = stage3pairarray[pathnum-1];
@@ -10444,12 +11176,12 @@ print_one_paired_end (Result_T result, Resulttype_T resulttype,
hit3 = stage3pair->hit3;
if (firstp == true) {
- Stage3end_print(fp_paired_mult,hit5,hit5->score,
+ Stage3end_print(fp,hit5,hit5->score,
chromosome_iit,queryseq,headerseq1,/*acc_suffix*/"/1",
invertp,hit5,hit3,stage3pair->insertlength,
stage3pair->score,stage3pair->pairtype,stage3pair->mapq_score);
} else {
- Stage3end_print(fp_paired_mult,hit3,hit3->score,
+ Stage3end_print(fp,hit3,hit3->score,
chromosome_iit,queryseq,headerseq1,/*acc_suffix*/"/2",
invertp,hit5,hit3,stage3pair->insertlength,
stage3pair->score,stage3pair->pairtype,stage3pair->mapq_score);
@@ -10457,7 +11189,7 @@ print_one_paired_end (Result_T result, Resulttype_T resulttype,
}
if (print_m8_p == false) {
- fprintf(fp_paired_mult,"\n");
+ FPRINTF(fp,"\n");
}
}
@@ -10474,44 +11206,48 @@ print_one_paired_end (Result_T result, Resulttype_T resulttype,
stage3array = (T *) Result_array2(&npaths,&first_absmq,&second_absmq,result);
}
- outputp = true;
+ excessivep = false;
translocationp = false;
if (resulttype == HALFMAPPING_UNIQ) {
if (npaths > 0 && Stage3end_circularpos(stage3array[0]) > 0) {
- fp = fp_halfmapping_circular;
+ Filestring_set_split_output(fp,OUTPUT_HC);
} else if (npaths_mate > 0 && Stage3end_circularpos(stage3array_mate[0]) > 0) {
- fp = fp_halfmapping_circular;
+ Filestring_set_split_output(fp,OUTPUT_HC);
} else {
- fp = fp_halfmapping_uniq;
+ Filestring_set_split_output(fp,OUTPUT_HU);
}
} else if (resulttype == HALFMAPPING_TRANSLOC) {
- fp = fp_halfmapping_transloc;
+ Filestring_set_split_output(fp,OUTPUT_HT);
translocationp = true;
} else if (resulttype == HALFMAPPING_MULT) {
- fp = fp_halfmapping_mult;
if (quiet_if_excessive_p && npaths > maxpaths) {
- outputp = false;
+ Filestring_set_split_output(fp,OUTPUT_HX);
+ excessivep = true;
+ } else {
+ Filestring_set_split_output(fp,OUTPUT_HM);
}
} else if (resulttype == UNPAIRED_UNIQ) {
if (npaths > 0 && Stage3end_circularpos(stage3array[0]) > 0) {
- fp = fp_unpaired_circular;
+ Filestring_set_split_output(fp,OUTPUT_UC);
} else if (npaths_mate > 0 && Stage3end_circularpos(stage3array_mate[0]) > 0) {
- fp = fp_unpaired_circular;
+ Filestring_set_split_output(fp,OUTPUT_UC);
} else {
- fp = fp_unpaired_uniq;
+ Filestring_set_split_output(fp,OUTPUT_UU);
}
} else if (resulttype == UNPAIRED_TRANSLOC) {
- fp = fp_unpaired_transloc;
+ Filestring_set_split_output(fp,OUTPUT_UT);
translocationp = true;
} else if (resulttype == UNPAIRED_MULT) {
- fp = fp_unpaired_mult;
if (quiet_if_excessive_p && npaths > maxpaths) {
- outputp = false;
+ Filestring_set_split_output(fp,OUTPUT_UX);
+ excessivep = true;
+ } else {
+ Filestring_set_split_output(fp,OUTPUT_UM);
}
} else {
@@ -10521,30 +11257,24 @@ print_one_paired_end (Result_T result, Resulttype_T resulttype,
if (print_m8_p == false) {
print_query_header(fp,initchar,queryseq,invertp);
- fprintf(fp,"\t%d %s",npaths,UNPAIRED_TEXT);
+ FPRINTF(fp,"\t%d %s",npaths,UNPAIRED_TEXT);
if (translocationp == true) {
- fprintf(fp," (transloc)");
- }
-
-#if 0
- /* Print unpaired type for unpaired_uniq results */
- if (resulttype == UNPAIRED_UNIQ) {
- stage3array = (T *) Result_array(&npaths,&first_absmq,&second_absmq,result);
- hit5 = stage3array[0];
- stage3array = (T *) Result_array2(&npaths,&first_absmq,&second_absmq,result);
- hit3 = stage3array[0];
- fprintf(fp," (%s)",unpaired_type_text(hit5,hit3));
+ FPRINTF(fp," (transloc)");
}
-#endif
print_barcode_and_quality(fp,queryseq,invertp,quality_shift);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
Shortread_print_header(fp,headerseq1,headerseq2);
}
- if (outputp == true) {
- /* Stage3end_eval_and_sort(stage3array,npaths,maxpaths,queryseq); */
+ if (excessivep == true) {
+ /* No output */
+ if (failedinput_root != NULL) {
+ Shortread_print_query_singleend(fp_failedinput,queryseq,headerseq1);
+ }
+
+ } else {
if (firstp == true) {
for (pathnum = 1; pathnum <= npaths && pathnum <= maxpaths; pathnum++) {
this = stage3array[pathnum-1];
@@ -10565,7 +11295,7 @@ print_one_paired_end (Result_T result, Resulttype_T resulttype,
}
if (print_m8_p == false) {
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
}
}
@@ -10574,70 +11304,6 @@ print_one_paired_end (Result_T result, Resulttype_T resulttype,
}
-/* Gets invert_first_p and invert_second_p from global above */
-
-void
-Stage3pair_print (Result_T result, Resulttype_T resulttype,
- Univ_IIT_T chromosome_iit, Shortread_T queryseq1, Shortread_T queryseq2,
- int maxpaths, bool quiet_if_excessive_p, bool nofailsp, bool failsonlyp,
- bool fastq_format_p, int quality_shift) {
-
- debug1(printf("Stage3pair_print: resulttype is %s\n",Resulttype_string(resulttype)));
-
- if (resulttype == PAIREDEND_NOMAPPING) {
- if (nofailsp == true) {
- /* No output */
- debug1(printf(" nofailsp is true, so no output\n"));
-
- } else {
- debug1(printf(" printing failure output\n"));
-
- /* First end */
- print_one_paired_end(result,resulttype,'>',/*firstp*/true,chromosome_iit,
- /*queryseq*/queryseq1,/*headerseq1*/queryseq1,/*headerseq2*/queryseq2,
- maxpaths,quiet_if_excessive_p,invert_first_p,quality_shift);
-
- /* Second end */
- print_one_paired_end(result,resulttype,'<',/*firstp*/false,chromosome_iit,
- /*queryseq*/queryseq2,/*headerseq1*/queryseq1,/*headerseq2*/queryseq2,
- maxpaths,quiet_if_excessive_p,invert_second_p,quality_shift);
-
- if (failedinput_root != NULL) {
- if (fastq_format_p == true) {
- debug1(printf(" fails as input is true, so printing\n"));
- Shortread_print_query_pairedend_fastq(fp_failedinput_1,fp_failedinput_2,queryseq1,queryseq2,
- invert_first_p,invert_second_p);
- } else {
- debug1(printf(" fails as input is true, so printing\n"));
- Shortread_print_query_pairedend_fasta(fp_failedinput_1,queryseq1,queryseq2,
- invert_first_p,invert_second_p);
- }
- }
- }
-
- } else {
- if (failsonlyp == true) {
- /* Unwanted success: skip */
- debug1(printf(" failsonlyp is true, so no output\n"));
-
- } else {
- /* First end */
- print_one_paired_end(result,resulttype,'>',/*firstp*/true,chromosome_iit,
- /*queryseq*/queryseq1,/*headerseq1*/queryseq1,/*headerseq2*/queryseq2,
- maxpaths,quiet_if_excessive_p,invert_first_p,quality_shift);
-
- /* Second end */
- print_one_paired_end(result,resulttype,'<',/*firstp*/false,chromosome_iit,
- /*queryseq*/queryseq2,/*headerseq1*/queryseq1,/*headerseq2*/queryseq2,
- maxpaths,quiet_if_excessive_p,invert_second_p,quality_shift);
- }
- }
-
- return;
-}
-
-
-
static List_T
strip_gaps_at_head (List_T pairs) {
Pair_T pair;
@@ -10671,110 +11337,67 @@ strip_gaps_at_tail (List_T pairs) {
}
+/* If use querylength_adj, ss.bug.4 fails. If use querylength, ss.bug.3 fails */
static List_T
Stage3end_convert_to_pairs (List_T pairs, T hit, Shortread_T queryseq,
- int clipdir, int hardclip_low, int hardclip_high,
- bool first_read_p, int queryseq_offset) {
+ int hardclip_low, int hardclip_high, int queryseq_offset) {
Pair_T pair;
- List_T newpairs = NULL, p;
+ List_T newpairs = NULL, p, q;
Chrpos_T genomicpos1, genomicpos2;
+ Substring_T substring, prev_substring;
+ Junction_T junction;
+ Junctiontype_T type;
+ char *deletion_string;
- if (hit->hittype == EXACT || hit->hittype == SUB || hit->hittype == TERMINAL) {
- debug13(printf("Converting exact/sub\n"));
- return Substring_convert_to_pairs(pairs,hit->substring1,queryseq,hardclip_low,hardclip_high,queryseq_offset);
-
- } else if (hit->hittype == INSERTION) {
- debug13(printf("Converting insertion\n"));
+ if (hit->hittype == TRANSLOC_SPLICE) {
+ /* Cannot handle translocations within a single GMAP alignment */
+ abort();
+ return NULL;
+
+ } else if (hit->hittype == GMAP) {
+ debug15(printf("Converting gmap to pairs\n"));
+ /* Use querylength here, but adj elsewhere */
+ return Pair_convert_array_to_pairs(pairs,hit->pairarray,hit->npairs,hit->plusp,hit->querylength,
+ hardclip_low,hardclip_high,queryseq_offset);
+ } else {
+ p = hit->substrings_1toN;
+ prev_substring = (Substring_T) List_head(p);
+ pairs = Substring_convert_to_pairs(pairs,prev_substring,hit->querylength,
+ queryseq,hardclip_low,hardclip_high,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,hardclip_low,hardclip_high,queryseq_offset);
- pairs = Substring_add_insertion(pairs,hit->substring1,hit->substring2,/*insertionlength*/hit->nindels,queryseq,
- hardclip_low,hardclip_high,queryseq_offset);
- if (hit->plusp == true) {
- pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,hardclip_low,hardclip_high,queryseq_offset);
- } else {
- pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,hardclip_low,hardclip_high,queryseq_offset);
- }
- return pairs;
-
- } else if (hit->hittype == DELETION) {
- debug13(printf("Converting deletion\n"));
-
- pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,hardclip_low,hardclip_high,queryseq_offset);
- pairs = Substring_add_deletion(pairs,hit->substring1,hit->substring2,/*deletion*/hit->deletion,/*deletionlength*/hit->nindels,
- hardclip_low,hardclip_high,queryseq_offset);
- if (hit->plusp == true) {
- pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,hardclip_low,hardclip_high,queryseq_offset);
- } else {
- pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,hardclip_low,hardclip_high,queryseq_offset);
- }
- return pairs;
-
- } else if (hit->hittype == HALFSPLICE_DONOR) {
- debug13(printf("Converting halfsplice_donor\n"));
- return Substring_convert_to_pairs(pairs,hit->substring_donor,queryseq,hardclip_low,hardclip_high,queryseq_offset);
-
- } else if (hit->hittype == HALFSPLICE_ACCEPTOR) {
- debug13(printf("Converting halfsplice_acceptor\n"));
- return Substring_convert_to_pairs(pairs,hit->substring_acceptor,queryseq,hardclip_low,hardclip_high,queryseq_offset);
-
- } else if (hit->hittype == SPLICE || hit->hittype == SAMECHR_SPLICE) {
- debug13(printf("Converting splice\n"));
-
- pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,hardclip_low,hardclip_high,queryseq_offset);
- pairs = Substring_add_intron(pairs,hit->substring1,hit->substring2,hardclip_low,hardclip_high,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,hardclip_low,hardclip_high,queryseq_offset);
- return pairs;
-
- } else if (hit->hittype == TRANSLOC_SPLICE) {
- /* Cannot handle translocations within a single GMAP alignment */
- abort();
- return NULL;
-
- } else if (hit->hittype == ONE_THIRD_SHORTEXON) {
- debug13(printf("Converting 1/3 shortexon\n"));
-
- return Substring_convert_to_pairs(pairs,/*shortexon*/hit->substring1,queryseq,hardclip_low,hardclip_high,queryseq_offset);
-
- } else if (hit->hittype == TWO_THIRDS_SHORTEXON) {
- debug13(printf("Converting 2/3 shortexon\n"));
-
- if (hit->substring0 == NULL) {
- pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,hardclip_low,hardclip_high,queryseq_offset);
- pairs = Substring_add_intron(pairs,hit->substring1,hit->substring2,hardclip_low,hardclip_high,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,hardclip_low,hardclip_high,queryseq_offset);
- return pairs;
-
- } else if (hit->substring2 == NULL) {
- pairs = Substring_convert_to_pairs(pairs,hit->substring0,queryseq,hardclip_low,hardclip_high,queryseq_offset);
- pairs = Substring_add_intron(pairs,hit->substring0,hit->substring1,hardclip_low,hardclip_high,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,hardclip_low,hardclip_high,queryseq_offset);
- return pairs;
-
- } else {
- abort();
+ for (q = hit->junctions_1toN, p = List_next(p); p != NULL; q = List_next(q), p = List_next(p)) {
+ junction = (Junction_T) List_head(q);
+ substring = (Substring_T) List_head(p);
+
+ if ((type = Junction_type(junction)) == INS_JUNCTION) {
+ pairs = Substring_add_insertion(pairs,prev_substring,substring,hit->querylength,
+ /*insertionlength*/Junction_nindels(junction),queryseq,
+ hardclip_low,hardclip_high,queryseq_offset);
+ } else if (type == DEL_JUNCTION) {
+ deletion_string = Junction_deletion_string(junction,genome,hit->plusp);
+ pairs = Substring_add_deletion(pairs,prev_substring,substring,hit->querylength,
+ deletion_string,/*deletionlength*/Junction_nindels(junction),
+ hardclip_low,hardclip_high,queryseq_offset);
+ } else if (type == SPLICE_JUNCTION) {
+ pairs = Substring_add_intron(pairs,prev_substring,substring,hit->querylength,
+ hardclip_low,hardclip_high,queryseq_offset);
+
+ } else {
+ abort();
+ }
+
+ pairs = Substring_convert_to_pairs(pairs,substring,hit->querylength,
+ queryseq,hardclip_low,hardclip_high,queryseq_offset);
+ prev_substring = substring;
}
- } else if (hit->hittype == SHORTEXON) {
- debug13(printf("Converting shortexon\n"));
- pairs = Substring_convert_to_pairs(pairs,hit->substring0,queryseq,hardclip_low,hardclip_high,queryseq_offset);
- pairs = Substring_add_intron(pairs,hit->substring0,hit->substring1,hardclip_low,hardclip_high,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring1,queryseq,hardclip_low,hardclip_high,queryseq_offset);
- pairs = Substring_add_intron(pairs,hit->substring1,hit->substring2,hardclip_low,hardclip_high,queryseq_offset);
- pairs = Substring_convert_to_pairs(pairs,hit->substring2,queryseq,hardclip_low,hardclip_high,queryseq_offset);
+ debug15(Pair_dump_list(pairs,true));
return pairs;
-
- } else if (hit->hittype == GMAP) {
- debug13(printf("Converting gmap\n"));
- return Pair_convert_array_to_pairs(pairs,hit->pairarray,hit->npairs,hit->plusp,hit->querylength,
- clipdir,hardclip_low,hardclip_high,first_read_p,queryseq_offset);
-
- } else {
- abort();
- return NULL;
}
}
+/* Don't want querylength_adj */
struct Pair_T *
Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged, char **quality_merged,
Stage3pair_T this, Shortread_T queryseq5, Shortread_T queryseq3,
@@ -10799,10 +11422,10 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
if (hit5->plusp == true) {
if (clipdir > 0) {
- pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,clipdir,hardclip5_low,hardclip5_high,/*first_read_p*/true,/*queryseq_offset*/0);
+ pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high,/*queryseq_offset*/0);
pairs5 = strip_gaps_at_head(pairs5);
- pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,clipdir,hardclip3_low,hardclip3_high,/*first_read_p*/false,
+ pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high,
/*queryseq_offset*/querylength5-hardclip5_low-hardclip5_high-hardclip3_low-hardclip3_high);
pairs3 = strip_gaps_at_tail(pairs3);
@@ -10837,10 +11460,10 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
}
} else if (clipdir < 0) {
- pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,clipdir,hardclip3_low,hardclip3_high,/*first_read_p*/false,/*queryseq_offset*/0);
+ pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high,/*queryseq_offset*/0);
pairs3 = strip_gaps_at_head(pairs3);
- pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,clipdir,hardclip5_low,hardclip5_high,/*first_read_p*/true,
+ pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high,
/*queryseq_offset*/querylength3-hardclip3_low-hardclip3_high-hardclip5_low-hardclip5_high);
pairs5 = strip_gaps_at_tail(pairs5);
@@ -10848,9 +11471,11 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
genomicpos1 = ((Pair_T) List_head(pairs3))->genomepos;
genomicpos2 = ((Pair_T) List_last_value(pairs5))->genomepos;
if (genomicpos2 != genomicpos1 + 1U) {
- printf("Accession %s, plus\n",Shortread_accession(queryseq5));
+ printf("Accession %s, plus, clipdir %d\n",Shortread_accession(queryseq5),clipdir);
printf("Expected genomicpos2 %u == genomicpos1 %u + 1\n",genomicpos2,genomicpos1);
+ printf("Begin of pairs3\n");
Pair_dump_list(pairs3,true);
+ printf("Begin of pairs5\n");
Pair_dump_list(pairs5,true);
abort();
}
@@ -10880,11 +11505,11 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
} else {
if (clipdir > 0) {
- pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,clipdir,hardclip3_low,hardclip3_high,/*first_read_p*/false,/*queryseq_offset*/0);
+ pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high,/*queryseq_offset*/0);
pairs3 = strip_gaps_at_head(pairs3);
- pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,clipdir,hardclip5_low,hardclip5_high,/*first_read_p*/true,
- /*queryseq_offset*/querylength3-hardclip3_low-hardclip3_high-hardclip5_low-hardclip5_high);
+ pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high,
+ /*queryseq_offset*/querylength3-hardclip3_low-hardclip3_high-hardclip5_low-hardclip5_high);
pairs5 = strip_gaps_at_tail(pairs5);
#ifdef CHECK_ASSERTIONS
@@ -10918,10 +11543,10 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
}
} else if (clipdir < 0) {
- pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,clipdir,hardclip5_low,hardclip5_high,/*first_read_p*/true,/*queryseq_offset*/0);
+ pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high,/*queryseq_offset*/0);
pairs5 = strip_gaps_at_head(pairs5);
- pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,clipdir,hardclip3_low,hardclip3_high,/*first_read_p*/false,
+ pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high,
/*queryseq_offset*/querylength5-hardclip5_low-hardclip5_high-hardclip3_low-hardclip3_high);
pairs3 = strip_gaps_at_tail(pairs3);
@@ -11010,135 +11635,171 @@ Stage3end_filter_bymatch (List_T hitlist) {
static Chrpos_T
overlap5_gmap_plus (int *querypos, Chrpos_T *genomicstart, Chrpos_T *genomicend,
Stage3end_T hit5, Stage3end_T gmap) {
- Substring_T substring_high;
+ Chrpos_T chrpos;
+ Substring_T substring;
+ List_T p;
debug10(printf("Entered overlap5_gmap_plus with gmap %d..%d\n",
gmap->pairarray[0].querypos,gmap->pairarray[gmap->npairs - 1].querypos));
- substring_high = (Substring_T) List_last_value(hit5->substring_LtoH);
- *genomicstart = Substring_alignstart_chr(substring_high);
- *genomicend = Substring_alignend_chr(substring_high);
- return Pair_binary_search_ascending(&(*querypos),/*lowi*/0,/*highi*/gmap->npairs,gmap->pairarray,
- *genomicstart,*genomicend);
+ for (p = hit5->substrings_LtoH; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_ambiguous_p(substring) == false) {
+ *genomicstart = Substring_alignstart_chr(substring);
+ *genomicend = Substring_alignend_chr(substring);
+ if ((chrpos = Pair_binary_search_ascending(&(*querypos),/*lowi*/0,/*highi*/gmap->npairs,gmap->pairarray,
+ *genomicstart,*genomicend)) > 0) {
+ return chrpos;
+ }
+ }
+ }
+
+ return 0;
}
static Chrpos_T
overlap3_gmap_plus (int *querypos, Chrpos_T *genomicstart, Chrpos_T *genomicend,
Stage3end_T hit3, Stage3end_T gmap) {
- Substring_T substring_low;
+ Chrpos_T chrpos;
+ Substring_T substring;
+ List_T p;
debug10(printf("Entered overlap3_gmap_plus with gmap %d..%d\n",
gmap->pairarray[0].querypos,gmap->pairarray[gmap->npairs - 1].querypos));
- substring_low = (Substring_T) List_head(hit3->substring_LtoH);
- *genomicstart = Substring_alignstart_chr(substring_low);
- *genomicend = Substring_alignend_chr(substring_low);
- return Pair_binary_search_ascending(&(*querypos),/*lowi*/0,/*highi*/gmap->npairs,gmap->pairarray,
- *genomicstart,*genomicend);
+ for (p = hit3->substrings_LtoH; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_ambiguous_p(substring) == false) {
+ *genomicstart = Substring_alignstart_chr(substring);
+ *genomicend = Substring_alignend_chr(substring);
+ if ((chrpos = Pair_binary_search_ascending(&(*querypos),/*lowi*/0,/*highi*/gmap->npairs,gmap->pairarray,
+ *genomicstart,*genomicend)) > 0) {
+ return chrpos;
+ }
+ }
+ }
+
+ return 0;
}
static Chrpos_T
overlap5_gmap_minus (int *querypos, Chrpos_T *genomicstart, Chrpos_T *genomicend,
Stage3end_T hit5, Stage3end_T gmap) {
- Substring_T substring_low;
+ Chrpos_T chrpos;
+ Substring_T substring;
+ List_T p;
debug10(printf("Entered overlap5_gmap_minus with gmap %d..%d\n",
gmap->pairarray[0].querypos,gmap->pairarray[gmap->npairs - 1].querypos));
- substring_low = (Substring_T) List_head(hit5->substring_LtoH);
- *genomicstart = Substring_alignstart_chr(substring_low);
- *genomicend = Substring_alignend_chr(substring_low);
- return Pair_binary_search_descending(&(*querypos),/*lowi*/0,/*highi*/gmap->npairs,gmap->pairarray,
- *genomicstart,*genomicend);
+ for (p = hit5->substrings_LtoH; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_ambiguous_p(substring) == false) {
+ *genomicstart = Substring_alignstart_chr(substring);
+ *genomicend = Substring_alignend_chr(substring);
+ if ((chrpos = Pair_binary_search_descending(&(*querypos),/*lowi*/0,/*highi*/gmap->npairs,gmap->pairarray,
+ *genomicstart,*genomicend)) > 0) {
+ return chrpos;
+ }
+ }
+ }
+
+ return 0;
}
static Chrpos_T
overlap3_gmap_minus (int *querypos, Chrpos_T *genomicstart, Chrpos_T *genomicend,
Stage3end_T hit3, Stage3end_T gmap) {
- Substring_T substring_high;
+ Chrpos_T chrpos;
+ Substring_T substring;
+ List_T p;
debug10(printf("Entered overlap3_gmap_minus with gmap %d..%d\n",
gmap->pairarray[0].querypos,gmap->pairarray[gmap->npairs - 1].querypos));
- substring_high = (Substring_T) List_last_value(hit3->substring_LtoH);
- *genomicstart = Substring_alignstart_chr(substring_high);
- *genomicend = Substring_alignend_chr(substring_high);
- return Pair_binary_search_descending(&(*querypos),/*lowi*/0,/*highi*/gmap->npairs,gmap->pairarray,
- *genomicstart,*genomicend);
+ for (p = hit3->substrings_LtoH; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ if (Substring_ambiguous_p(substring) == false) {
+ *genomicstart = Substring_alignstart_chr(substring);
+ *genomicend = Substring_alignend_chr(substring);
+ if ((chrpos = Pair_binary_search_descending(&(*querypos),/*lowi*/0,/*highi*/gmap->npairs,gmap->pairarray,
+ *genomicstart,*genomicend)) > 0) {
+ return chrpos;
+ }
+ }
+ }
+
+ return 0;
}
+/* Should not set ambiguous flag in substrings, because resolution of
+ an ambiguity depends on a particular pair of ends */
+
static void
-resolve_inside_ambiguous_splice_plus (int *unresolved_amb_length, T *hit5, T *hit3, bool *private5p, bool *private3p,
- Univcoord_T *splicesites,
+resolve_inside_ambiguous_splice_plus (int *unresolved_amb_length, int *amb_resolve_5, int *amb_resolve_3,
+ int *amb_status_inside, T hit5, T hit3, Univcoord_T *splicesites,
Compress_T query5_compress_fwd, Compress_T query3_compress_fwd,
int localsplicing_penalty, int querylength5, int querylength3,
int genestrand) {
-#ifdef USE_BINGO
- Chrpos_T insertlength;
-#endif
+ int insertlength;
Univcoord_T genomicstart, genomicend;
- int nbingo, bingoi5, bingoi3, nbounded, boundedi5, boundedi3, nbest, besti5, besti3, i, j;
+ int nbingo, bingoi5, bingoi3;
+ int nbest, besti5, besti3, i, j;
int best_nmismatches, nmismatches;
- bool new5p = false, new3p = false;
- T old;
-
- Substring_T donor, acceptor, shortexon;
- Univcoord_T segment_left;
- int nmismatches_shortend;
- Univcoord_T donor_splicecoord, acceptor_splicecoord;
- int donor_knowni, acceptor_knowni;
- int splice_pos;
- int ignore_found_score = 0;
+ Substring_T substring5, substring3;
#ifdef LARGE_GENOMES
Uint8list_T ambcoords;
#else
Uintlist_T ambcoords;
#endif
- Intlist_T amb_knowni, amb_nmismatches;
- Doublelist_T amb_probs;
- double prob_shortend;
+ Univcoord_T *end_ambcoords, *start_ambcoords;
+ int *end_amb_nmismatches, *start_amb_nmismatches;
+ int end_amb_length_5, start_amb_length_3;
*unresolved_amb_length = 0;
- debug9(printf("resolve plus: hit5 %s ambiguous %d,%d and hit3 %s ambiguous %d,%d\n",
- hittype_string((*hit5)->hittype),(*hit5)->start_ambiguous_p,(*hit5)->end_ambiguous_p,
- hittype_string((*hit3)->hittype),(*hit3)->start_ambiguous_p,(*hit3)->end_ambiguous_p));
+ if (hit5->hittype == GMAP) {
+ substring5 = (Substring_T) NULL;
+ } else {
+ substring5 = (Substring_T) List_head(hit5->substrings_Nto1);
+ }
+ if (hit3->hittype == GMAP) {
+ substring3 = (Substring_T) NULL;
+ } else {
+ substring3 = (Substring_T) List_head(hit3->substrings_1toN);
+ }
+ debug9(printf("resolve plus: hit5 %s and hit3 %s\n",
+ hittype_string(hit5->hittype),hittype_string(hit3->hittype)));
- if ((*hit5)->end_ambiguous_p == true && (*hit3)->start_ambiguous_p == true) {
- debug9(printf("Got ambiguous at 5' and ambiguous at 3':"));
- nbest = nbounded = nbingo = 0;
+ if (substring5 != NULL && Substring_ambiguous_p(substring5) == true &&
+ substring3 != NULL && Substring_ambiguous_p(substring3) == true) {
+ debug9(printf("Resolve plus: Got ambiguous at 5' and ambiguous at 3':"));
+ end_ambcoords = Substring_ambcoords(substring5);
+ end_amb_nmismatches = Substring_amb_nmismatches(substring5);
+ start_ambcoords = Substring_ambcoords(substring3);
+ start_amb_nmismatches = Substring_amb_nmismatches(substring3);
+ end_amb_length_5 = end_amb_length(hit5);
+ start_amb_length_3 = start_amb_length(hit3);
+
+ nbingo = nbest = 0;
best_nmismatches = querylength5 + querylength3;
- for (i = 0; i < (*hit5)->end_nambcoords; i++) {
- genomicend = (*hit5)->end_ambcoords[i]; /* splicesites[] */
- for (j = 0; j < (*hit3)->start_nambcoords; j++) {
- genomicstart = (*hit3)->start_ambcoords[j]; /* splicesites[] */
- debug9(printf(" %u,%u",(Chrpos_T) (genomicend - (*hit5)->chroffset),(Chrpos_T) (genomicstart - (*hit3)->chroffset)));
+ for (i = 0; i < Substring_nambcoords(substring5); i++) {
+ genomicend = end_ambcoords[i] + end_amb_length_5;
+ for (j = 0; j < Substring_nambcoords(substring3); j++) {
+ genomicstart = start_ambcoords[j] - start_amb_length_3;
+ debug9(printf(" %u,%u",(Chrpos_T) (genomicend - hit5->chroffset),(Chrpos_T) (genomicstart - hit3->chroffset)));
if (genomicend < genomicstart) {
- nbounded++;
- boundedi5 = i;
- boundedi3 = j;
-
-#ifdef USE_BINGO
+ /* Look for valid insertlength */
insertlength = genomicstart - genomicend + querylength5 + querylength3;
debug9(printf(" (%u)",insertlength));
- if (insertlength < expected_pairlength) {
- if (expected_pairlength - insertlength <= pairlength_deviation) {
- nbingo++;
- bingoi5 = i;
- bingoi3 = j;
- debug9(printf("*"));
- }
- } else {
- if (insertlength - expected_pairlength <= pairlength_deviation) {
- nbingo++;
- bingoi5 = i;
- bingoi3 = j;
- debug9(printf("*"));
- }
+ if (insertlength >= expected_pairlength_low && insertlength <= expected_pairlength_high) {
+ nbingo++;
+ bingoi5 = i;
+ bingoi3 = j;
+ debug9(printf("*"));
}
-#endif
- if ((nmismatches = (*hit5)->end_amb_nmismatches[i] + (*hit3)->start_amb_nmismatches[j]) < best_nmismatches) {
+ if ((nmismatches = end_amb_nmismatches[i] + start_amb_nmismatches[j]) < best_nmismatches) {
best_nmismatches = nmismatches;
besti5 = i;
besti3 = j;
@@ -11150,60 +11811,54 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_length, T *hit5, T *hi
}
}
-#if 0
- /* No longer holds for GMAP */
- assert((*hit5)->end_amb_length > 0);
- assert((*hit3)->start_amb_length > 0);
-#endif
-
-#ifdef USE_BINGO
if (nbingo == 1) {
- new5p = true; new3p = true;
- } else if (nbounded == 1) {
- new5p = true; new3p = true; bingoi5 = boundedi5; bingoi3 = boundedi3;
- }
-#endif
+ *amb_resolve_5 = bingoi5;
+ *amb_resolve_3 = bingoi3;
+ *amb_status_inside = AMB_RESOLVED_BYLENGTH;
- if (nbest == 0) {
+ } else if (nbest == 0) {
debug9(printf("\nnbest is zero: amb_length = %d...%d",
- (*hit5)->end_amb_length,(*hit3)->start_amb_length));
- *unresolved_amb_length = (*hit5)->end_amb_length + (*hit3)->start_amb_length;
+ end_amb_length(hit5),start_amb_length(hit3)));
+ *unresolved_amb_length = end_amb_length_5 + start_amb_length_3;
+ *amb_status_inside = AMB_UNRESOLVED_TOOCLOSE;
+
} else if (nbest == 1) {
debug9(printf("\nnbest is 1, with nmismatches %d\n",best_nmismatches));
- new5p = true; new3p = true; bingoi5 = besti5; bingoi3 = besti3;
+ *amb_resolve_5 = besti5;
+ *amb_resolve_3 = besti3;
+ *amb_status_inside = AMB_RESOLVED_BYMATCHES;
+
+ } else {
+ *amb_resolve_5 = -1; /* Signifies cannot resolve */
+ *amb_resolve_3 = -1; /* Signifies cannot resolve */
+ *amb_status_inside = AMB_UNRESOLVED_MULTIPLE;
}
debug9(printf("\n"));
- } else if ((*hit5)->end_ambiguous_p == true) {
- debug9(printf("Got ambiguous at 5' (%s):",hittype_string((*hit5)->hittype)));
- nbest = nbounded = nbingo = 0;
- best_nmismatches = querylength5;
- for (i = 0; i < (*hit5)->end_nambcoords; i++) {
- genomicend = (*hit5)->end_ambcoords[i]; /* splicesites[] */
- debug9(printf(" %u",(Chrpos_T) (genomicend - (*hit5)->chroffset)));
- if (genomicend < (*hit3)->genomicstart /*allow overlap*/+ querylength3) {
- nbounded++;
- boundedi5 = i;
+ } else if (substring5 != NULL && Substring_ambiguous_p(substring5) == true) {
+ debug9(printf("hit3 %u..%u\n",hit3->genomicstart - hit3->chroffset,hit3->genomicend - hit3->chroffset));
+ debug9(printf("Resolve plus: Got ambiguous at 5' (%s):",hittype_string(hit5->hittype)));
+ debug9(printf(" (?< %u):",hit3->genomicstart + querylength3 - hit3->chroffset));
+ end_ambcoords = Substring_ambcoords(substring5);
+ end_amb_nmismatches = Substring_amb_nmismatches(substring5);
+ end_amb_length_5 = end_amb_length(hit5);
-#ifdef USE_BINGO
- insertlength = (*hit3)->genomicstart - genomicend + querylength5 + querylength3;
+ nbingo = nbest = 0;
+ best_nmismatches = querylength5;
+ for (i = 0; i < Substring_nambcoords(substring5); i++) {
+ genomicend = end_ambcoords[i] + end_amb_length_5;
+ debug9(printf(" %u (%d mismatches)",(Chrpos_T) (genomicend - hit5->chroffset),end_amb_nmismatches[i]));
+ if (genomicend < hit3->genomicstart /*allow overlap*/+ querylength3) {
+ /* Look for valid insertlength */
+ insertlength = hit3->genomicstart - genomicend + querylength5 + querylength3;
debug9(printf(" (%u)",insertlength));
- if (insertlength < expected_pairlength) {
- if (expected_pairlength - insertlength <= pairlength_deviation) {
- nbingo++;
- bingoi5 = i;
- debug9(printf("*"));
- }
- } else {
- if (insertlength - expected_pairlength <= pairlength_deviation) {
- nbingo++;
- bingoi5 = i;
- debug9(printf("*"));
- }
+ if (insertlength >= expected_pairlength_low && insertlength <= expected_pairlength_high) {
+ nbingo++;
+ bingoi5 = i;
+ debug9(printf("*"));
}
-#endif
- if ((nmismatches = (*hit5)->end_amb_nmismatches[i]) < best_nmismatches) {
+ if ((nmismatches = end_amb_nmismatches[i]) < best_nmismatches) {
best_nmismatches = nmismatches;
besti5 = i;
nbest = 1;
@@ -11213,60 +11868,50 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_length, T *hit5, T *hi
}
}
-#if 0
- /* No longer holds for GMAP */
- assert((*hit5)->end_amb_length > 0);
- assert((*hit3)->start_amb_length == 0);
-#endif
-
-#ifdef USE_BINGO
if (nbingo == 1) {
- new5p = true;
- } else if (nbounded == 1) {
- new5p = true; bingoi5 = boundedi5;
- }
-#endif
+ debug9(printf("\nnbingo is 1\n"));
+ *amb_resolve_5 = bingoi5;
+ *amb_status_inside = AMB_RESOLVED_BYLENGTH;
- if (nbest == 0) {
+ } else if (nbest == 0) {
debug9(printf("\nnbest is zero: amb_length = %d...%d",
- (*hit5)->end_amb_length,(*hit3)->start_amb_length));
- *unresolved_amb_length = (*hit5)->end_amb_length;
+ end_amb_length(hit5),start_amb_length(hit3)));
+ *unresolved_amb_length = end_amb_length_5;
+ *amb_status_inside = AMB_UNRESOLVED_TOOCLOSE;
+
} else if (nbest == 1) {
debug9(printf("\nnbest is 1, with nmismatches %d\n",best_nmismatches));
- new5p = true; bingoi5 = besti5;
+ *amb_resolve_5 = besti5;
+ *amb_status_inside = AMB_RESOLVED_BYMATCHES;
+
+ } else {
+ *amb_resolve_5 = -1;
+ *amb_status_inside = AMB_UNRESOLVED_MULTIPLE;
}
debug9(printf("\n"));
- } else if ((*hit3)->start_ambiguous_p == true) {
- debug9(printf("Got ambiguous at 3':"));
- nbest = nbounded = nbingo = 0;
+ } else if (substring3 != NULL && Substring_ambiguous_p(substring3) == true) {
+ debug9(printf("Resolve plus: Got ambiguous at 3':"));
+ start_ambcoords = Substring_ambcoords(substring3);
+ start_amb_nmismatches = Substring_amb_nmismatches(substring3);
+ start_amb_length_3 = start_amb_length(hit3);
+
+ nbingo = nbest = 0;
best_nmismatches = querylength3;
- for (j = 0; j < (*hit3)->start_nambcoords; j++) {
- genomicstart = (*hit3)->start_ambcoords[j]; /* splicesites[] */
- debug9(printf(" %u",(Chrpos_T) (genomicstart - (*hit3)->chroffset)));
- if ((*hit5)->genomicend < genomicstart /*allow overlap*/+ querylength5) {
- nbounded++;
- boundedi3 = j;
-
-#ifdef USE_BINGO
- insertlength = genomicstart - (*hit5)->genomicend + querylength5 + querylength3;
+ for (j = 0; j < Substring_nambcoords(substring3); j++) {
+ genomicstart = start_ambcoords[j] - start_amb_length_3;
+ debug9(printf(" %u",(Chrpos_T) (genomicstart - hit3->chroffset)));
+ if (hit5->genomicend < genomicstart /*allow overlap*/+ querylength5) {
+ /* Look for valid insertlength */
+ insertlength = genomicstart - hit5->genomicend + querylength5 + querylength3;
debug9(printf(" (%u)",insertlength));
- if (insertlength < expected_pairlength) {
- if (expected_pairlength - insertlength <= pairlength_deviation) {
- nbingo++;
- bingoi3 = j;
- debug9(printf("*"));
- }
- } else {
- if (insertlength - expected_pairlength <= pairlength_deviation) {
- nbingo++;
- bingoi3 = j;
- debug9(printf("*"));
- }
+ if (insertlength >= expected_pairlength_low && insertlength <= expected_pairlength_high) {
+ nbingo++;
+ bingoi3 = j;
+ debug9(printf("*"));
}
-#endif
- if ((nmismatches = (*hit3)->start_amb_nmismatches[j]) < best_nmismatches) {
+ if ((nmismatches = start_amb_nmismatches[j]) < best_nmismatches) {
best_nmismatches = nmismatches;
besti3 = j;
nbest = 1;
@@ -11276,518 +11921,103 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_length, T *hit5, T *hi
}
}
-#if 0
- /* No longer holds for GMAP */
- assert((*hit5)->end_amb_length == 0);
- assert((*hit3)->start_amb_length > 0);
-#endif
-
-#ifdef USE_BINGO
if (nbingo == 1) {
- new3p = true;
- } else if (nbounded == 1) {
- new3p = true; bingoi3 = boundedi3;
- }
-#endif
+ debug9(printf("\nnbingo is 1\n"));
+ *amb_resolve_3 = bingoi3;
+ *amb_status_inside = AMB_RESOLVED_BYLENGTH;
- if (nbest == 0) {
+ } else if (nbest == 0) {
debug9(printf("\nnbest is zero: amb_length = %d...%d",
- (*hit5)->end_amb_length,(*hit3)->start_amb_length));
- *unresolved_amb_length = (*hit3)->start_amb_length;
+ end_amb_length(hit5),start_amb_length(hit3)));
+ *unresolved_amb_length = start_amb_length_3;
+ *amb_status_inside = AMB_UNRESOLVED_TOOCLOSE;
+
} else if (nbest == 1) {
debug9(printf("\nnbest is 1, with nmismatches %d\n",best_nmismatches));
- new3p = true; bingoi3 = besti3;
- }
- debug9(printf("\n"));
- }
-
- if (new5p == false) {
- /* Skip */
- } else if ((*hit5)->hittype == ONE_THIRD_SHORTEXON || (*hit5)->hittype == TWO_THIRDS_SHORTEXON) {
-
- if ((*hit5)->sensedir == SENSE_FORWARD) {
- /* End 1 */
- shortexon = (*hit5)->substring1;
-
- donor_splicecoord = Substring_splicecoord_D(shortexon);
- /* donor_knowni = Substring_splicesites_knowni_D(shortexon); */
- splice_pos = Substring_chimera_pos_D(shortexon);
- acceptor_splicecoord = (*hit5)->ambcoords_acceptor[bingoi5];
- acceptor_knowni = (*hit5)->amb_knowni_acceptor[bingoi5];
- nmismatches_shortend = (*hit5)->amb_nmismatches_acceptor[bingoi5];
- prob_shortend = (*hit5)->amb_probs_acceptor[bingoi5];
- segment_left = acceptor_splicecoord - splice_pos;
-
- if ((acceptor = Substring_new_acceptor(acceptor_splicecoord,acceptor_knowni,splice_pos,nmismatches_shortend,
- /*prob*/prob_shortend,/*left*/segment_left,query5_compress_fwd,
- querylength5,/*plusp*/true,genestrand,/*first_read_p*/true,/*sensep*/true,
- Substring_chrnum(shortexon),Substring_chroffset(shortexon),
- Substring_chrhigh(shortexon),Substring_chrlength(shortexon))) != NULL) {
- debug9(printf("Resolved shortexon, End 1: Splice from donor %u to acceptor %u, with nmismatches %d\n",
- donor_splicecoord - Substring_chroffset(shortexon),
- acceptor_splicecoord - Substring_chroffset(shortexon),nmismatches_shortend));
- old = *hit5;
-#ifdef LARGE_GENOMES
- ambcoords = Uint8list_from_array(old->ambcoords_donor,old->nambcoords_donor);
-#else
- ambcoords = Uintlist_from_array(old->ambcoords_donor,old->nambcoords_donor);
-#endif
- amb_knowni = Intlist_from_array(old->amb_knowni_donor,old->nambcoords_donor);
- amb_nmismatches = Intlist_from_array(old->amb_nmismatches_donor,old->nambcoords_donor);
- amb_probs = Doublelist_from_array(old->amb_probs_donor,old->nambcoords_donor);
-
- *hit5 = Stage3end_new_shortexon(&ignore_found_score,/*donor*/old->substringD,acceptor,shortexon,
- old->amb_length_donor,/*amb_length_acceptor*/0,
- /*amb_prob_donor*/Doublelist_max(amb_probs),/*amb_prob_acceptor*/0.0,
- ambcoords,/*ambcoords_acceptor*/NULL,
- amb_knowni,/*amb_knowni_acceptor*/NULL,
- amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
- amb_probs,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true,
- localsplicing_penalty,querylength5,/*sensedir*/SENSE_FORWARD,
- /*sarrayp*/false);
- Doublelist_free(&amb_probs);
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
-#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords);
-#else
- Uintlist_free(&ambcoords);
-#endif
-
- if (*private5p == true) {
- Stage3end_free(&old);
- }
- *private5p = true;
- }
-
- } else if ((*hit5)->sensedir == SENSE_ANTI) {
- /* End 6 */
- shortexon = (*hit5)->substring1;
-
- acceptor_splicecoord = Substring_splicecoord_A(shortexon);
- /* acceptor_knowni = Substring_splicesites_knowni_A(shortexon); */
- splice_pos = Substring_chimera_pos_A(shortexon);
- donor_splicecoord = (*hit5)->ambcoords_donor[bingoi5];
- donor_knowni = (*hit5)->amb_knowni_donor[bingoi5];
- nmismatches_shortend = (*hit5)->amb_nmismatches_donor[bingoi5];
- prob_shortend = (*hit5)->amb_probs_donor[bingoi5];
- segment_left = donor_splicecoord - splice_pos;
-
- if ((donor = Substring_new_donor(donor_splicecoord,donor_knowni,splice_pos,nmismatches_shortend,
- /*prob*/prob_shortend,/*left*/segment_left,query5_compress_fwd,
- querylength5,/*plusp*/true,genestrand,/*first_read_p*/true,/*sensep*/false,
- Substring_chrnum(shortexon),Substring_chroffset(shortexon),
- Substring_chrhigh(shortexon),Substring_chrlength(shortexon))) != NULL) {
- debug9(printf("Resolved shortexon, End 6: Splice from antiacceptor %u to antidonor %u, with nmismatches %d\n",
- acceptor_splicecoord - Substring_chroffset(shortexon),
- donor_splicecoord - Substring_chroffset(shortexon),nmismatches_shortend));
- old = *hit5;
-#ifdef LARGE_GENOMES
- ambcoords = Uint8list_from_array(old->ambcoords_acceptor,old->nambcoords_acceptor);
-#else
- ambcoords = Uintlist_from_array(old->ambcoords_acceptor,old->nambcoords_acceptor);
-#endif
- amb_knowni = Intlist_from_array(old->amb_knowni_acceptor,old->nambcoords_acceptor);
- amb_nmismatches = Intlist_from_array(old->amb_nmismatches_acceptor,old->nambcoords_acceptor);
- amb_probs = Doublelist_from_array(old->amb_probs_acceptor,old->nambcoords_acceptor);
-
- *hit5 = Stage3end_new_shortexon(&ignore_found_score,donor,/*acceptor*/old->substringA,shortexon,
- /*amb_length_donor*/0,old->amb_length_acceptor,
- /*amb_prob_donor*/0.0,/*amb_prob_acceptor*/Doublelist_max(amb_probs),
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,amb_knowni,
- /*amb_nmismatches_donor*/NULL,amb_nmismatches,
- /*amb_probs_donor*/NULL,amb_probs,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true,
- localsplicing_penalty,querylength5,/*sensedir*/SENSE_ANTI,
- /*sarrayp*/false);
- Doublelist_free(&amb_probs);
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
-#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords);
-#else
- Uintlist_free(&ambcoords);
-#endif
-
- if (*private5p == true) {
- Stage3end_free(&old);
- }
- *private5p = true;
- }
+ *amb_resolve_3 = besti3;
+ *amb_status_inside = AMB_RESOLVED_BYMATCHES;
} else {
- fprintf(stderr,"Shortexon hit5 has no sensedir\n");
- abort();
- }
-
-
- } else if ((*hit5)->hittype == HALFSPLICE_DONOR) {
- /* End 1 */
- assert((*hit5)->sensedir == SENSE_FORWARD);
- donor = (*hit5)->substring_donor;
-
- donor_splicecoord = Substring_splicecoord(donor);
- /* donor_knowni = Substring_splicesites_knowni(donor); */
- splice_pos = Substring_chimera_pos(donor);
- acceptor_splicecoord = (*hit5)->ambcoords_acceptor[bingoi5];
- acceptor_knowni = (*hit5)->amb_knowni_acceptor[bingoi5];
- nmismatches_shortend = (*hit5)->amb_nmismatches_acceptor[bingoi5];
- prob_shortend = (*hit5)->amb_probs_acceptor[bingoi5];
- segment_left = acceptor_splicecoord - splice_pos;
-
- if ((acceptor = Substring_new_acceptor(acceptor_splicecoord,acceptor_knowni,splice_pos,nmismatches_shortend,
- /*prob*/prob_shortend,/*left*/segment_left,query5_compress_fwd,
- querylength5,/*plusp*/true,genestrand,/*first_read_p*/true,/*sensep*/true,
- Substring_chrnum(donor),Substring_chroffset(donor),
- Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
- debug9(printf("Resolved halfsplice_donor, End 1: Splice from donor #%d to acceptor #%d, with nmismatches %d\n",
- Substring_splicecoord(donor) - Substring_chroffset(donor),
- Substring_splicecoord(acceptor) - Substring_chroffset(acceptor),nmismatches_shortend));
- old = *hit5;
- *hit5 = Stage3end_new_splice(&ignore_found_score,Substring_nmismatches_whole(donor),/*nmismatches_acceptor*/nmismatches_shortend,
- donor,acceptor,/*distance*/acceptor_splicecoord - donor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength5,/*amb_length*/0,/*amb_prob*/0.0,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,/*first_read_p*/true,/*sensedir*/SENSE_FORWARD,
- /*sarrayp*/false);
- if (*private5p == true) {
- Stage3end_free(&old);
- }
- *private5p = true;
+ *amb_resolve_3 = -1;
+ *amb_status_inside = AMB_UNRESOLVED_MULTIPLE;
}
-
- } else if ((*hit5)->hittype == HALFSPLICE_ACCEPTOR) {
- /* End 6 */
- assert((*hit5)->sensedir == SENSE_ANTI);
- acceptor = (*hit5)->substring_acceptor;
-
- acceptor_splicecoord = Substring_splicecoord(acceptor);
- /* acceptor_knowni = Substring_splicesites_knowni(acceptor); */
- splice_pos = Substring_chimera_pos(acceptor);
- donor_splicecoord = (*hit5)->ambcoords_donor[bingoi5];
- donor_knowni = (*hit5)->amb_knowni_donor[bingoi5];
- nmismatches_shortend = (*hit5)->amb_nmismatches_donor[bingoi5];
- prob_shortend = (*hit5)->amb_probs_donor[bingoi5];
- segment_left = donor_splicecoord - splice_pos;
-
- if ((donor = Substring_new_donor(donor_splicecoord,donor_knowni,splice_pos,nmismatches_shortend,
- /*prob*/prob_shortend,/*left*/segment_left,query5_compress_fwd,
- querylength5,/*plusp*/true,genestrand,/*first_read_p*/true,/*sensep*/false,
- Substring_chrnum(acceptor),Substring_chroffset(acceptor),
- Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
- debug9(printf("Resolved halfsplice_acceptor, End 6: Splice from antiacceptor #%d to antidonor #%d, with nmismatches %d\n",
- Substring_splicecoord(acceptor) - Substring_chroffset(acceptor),
- Substring_splicecoord(donor) - Substring_chroffset(donor),nmismatches_shortend));
- old = *hit5;
- *hit5 = Stage3end_new_splice(&ignore_found_score,/*nmismatches_donor*/nmismatches_shortend,Substring_nmismatches_whole(acceptor),
- donor,acceptor,/*distance*/donor_splicecoord - acceptor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength5,/*amb_length*/0,/*amb_prob*/0.0,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,/*first_read_p*/true,/*sensedir*/SENSE_ANTI,
- /*sarrayp*/false);
- if (*private5p == true) {
- Stage3end_free(&old);
- }
- *private5p = true;
- }
-
- } else {
- fprintf(stderr,"Unexpected hittype %d for ambiguous end\n",(*hit5)->hittype);
- abort();
+
+ debug9(printf("\n"));
}
- if (new3p == false) {
- /* Skip */
-
- } else if ((*hit3)->hittype == ONE_THIRD_SHORTEXON || (*hit3)->hittype == TWO_THIRDS_SHORTEXON) {
-
- if ((*hit3)->sensedir == SENSE_ANTI) {
- /* End 5 */
- shortexon = (*hit3)->substring1;
-
- donor_splicecoord = Substring_splicecoord_D(shortexon);
- /* donor_knowni = Substring_splicesites_knowni_D(shortexon); */
- splice_pos = Substring_chimera_pos_D(shortexon);
- acceptor_splicecoord = (*hit3)->ambcoords_acceptor[bingoi3];
- acceptor_knowni = (*hit3)->amb_knowni_acceptor[bingoi3];
- nmismatches_shortend = (*hit3)->amb_nmismatches_acceptor[bingoi3];
- prob_shortend = (*hit3)->amb_probs_acceptor[bingoi3];
- segment_left = acceptor_splicecoord - splice_pos;
-
- if ((acceptor = Substring_new_acceptor(acceptor_splicecoord,acceptor_knowni,splice_pos,nmismatches_shortend,
- /*prob*/prob_shortend,segment_left,query3_compress_fwd,
- querylength3,/*plusp*/true,genestrand,/*first_read_p*/false,/*sensep*/false,
- Substring_chrnum(shortexon),Substring_chroffset(shortexon),
- Substring_chrhigh(shortexon),Substring_chrlength(shortexon))) != NULL) {
- debug9(printf("Resolved shortexonr, End 5: Splice from antidonor #%d to antiacceptor #%d, with nmismatches %d\n",
- donor_splicecoord - Substring_chroffset(shortexon),
- acceptor_splicecoord - Substring_chroffset(shortexon),nmismatches_shortend));
- old = *hit3;
-#ifdef LARGE_GENOMES
- ambcoords = Uint8list_from_array(old->ambcoords_donor,old->nambcoords_donor);
-#else
- ambcoords = Uintlist_from_array(old->ambcoords_donor,old->nambcoords_donor);
-#endif
- amb_knowni = Intlist_from_array(old->amb_knowni_donor,old->nambcoords_donor);
- amb_nmismatches = Intlist_from_array(old->amb_nmismatches_donor,old->nambcoords_donor);
- amb_probs = Doublelist_from_array(old->amb_probs_donor,old->nambcoords_donor);
-
- *hit3 = Stage3end_new_shortexon(&ignore_found_score,/*donor*/old->substringD,acceptor,shortexon,
- old->amb_length_donor,/*amb_length_acceptor*/0,
- /*amb_prob_donor*/Doublelist_max(amb_probs),/*amb_prob_acceptor*/0.0,
- ambcoords,/*ambcoords_acceptor*/NULL,
- amb_knowni,/*amb_knowni_acceptor*/NULL,
- amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
- amb_probs,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true,
- localsplicing_penalty,querylength3,/*sensedir*/SENSE_ANTI,
- /*sarrayp*/false);
- Doublelist_free(&amb_probs);
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
-#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords);
-#else
- Uintlist_free(&ambcoords);
-#endif
-
- if (*private3p == true) {
- Stage3end_free(&old);
- }
- *private3p = true;
- }
-
- } else if ((*hit3)->sensedir == SENSE_FORWARD) {
- /* End 2 */
- shortexon = (*hit3)->substring1;
-
- acceptor_splicecoord = Substring_splicecoord_A(shortexon);
- /* acceptor_knowni = Substring_splicesites_knowni_A(shortexon); */
- splice_pos = Substring_chimera_pos_A(shortexon);
- donor_splicecoord = (*hit3)->ambcoords_donor[bingoi3];
- donor_knowni = (*hit3)->amb_knowni_donor[bingoi3];
- nmismatches_shortend = (*hit3)->amb_nmismatches_donor[bingoi3];
- prob_shortend = (*hit3)->amb_probs_donor[bingoi3];
- segment_left = donor_splicecoord - splice_pos;
-
- if ((donor = Substring_new_donor(donor_splicecoord,donor_knowni,splice_pos,nmismatches_shortend,
- /*prob*/prob_shortend,segment_left,query3_compress_fwd,
- querylength3,/*plusp*/true,genestrand,/*first_read_p*/false,/*sensep*/true,
- Substring_chrnum(shortexon),Substring_chroffset(shortexon),
- Substring_chrhigh(shortexon),Substring_chrlength(shortexon))) != NULL) {
- debug9(printf("Resolved shortexon, End 2: Splice from acceptor #%d to donor #%d, with nmismatches %d\n",
- acceptor_splicecoord - Substring_chroffset(shortexon),
- donor_splicecoord - Substring_chroffset(shortexon),nmismatches_shortend));
- old = *hit3;
-#ifdef LARGE_GENOMES
- ambcoords = Uint8list_from_array(old->ambcoords_acceptor,old->nambcoords_acceptor);
-#else
- ambcoords = Uintlist_from_array(old->ambcoords_acceptor,old->nambcoords_acceptor);
-#endif
- amb_knowni = Intlist_from_array(old->amb_knowni_acceptor,old->nambcoords_acceptor);
- amb_nmismatches = Intlist_from_array(old->amb_nmismatches_acceptor,old->nambcoords_acceptor);
- amb_probs = Doublelist_from_array(old->amb_probs_acceptor,old->nambcoords_acceptor);
-
- *hit3 = Stage3end_new_shortexon(&ignore_found_score,donor,/*acceptor*/old->substringA,shortexon,
- /*amb_length_donor*/0,old->amb_length_acceptor,
- /*amb_prob_donor*/0.0,/*amb_prob_acceptor*/Doublelist_max(amb_probs),
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,amb_knowni,
- /*amb_nmismatches_donor*/NULL,amb_nmismatches,
- /*amb_probs_donor*/NULL,amb_probs,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true,
- localsplicing_penalty,querylength3,/*sensedir*/SENSE_FORWARD,
- /*sarrayp*/false);
- Doublelist_free(&amb_probs);
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
-#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords);
-#else
- Uintlist_free(&ambcoords);
-#endif
-
- if (*private3p == true) {
- Stage3end_free(&old);
- }
- *private3p = true;
- }
-
- } else {
- fprintf(stderr,"Shortexon hit5 has no sensedir\n");
- abort();
- }
-
-
- } else if ((*hit3)->hittype == HALFSPLICE_DONOR) {
- /* End 5 */
- assert((*hit3)->sensedir == SENSE_ANTI);
- donor = (*hit3)->substring_donor;
-
- donor_splicecoord = Substring_splicecoord(donor);
- /* donor_knowni = Substring_splicesites_knowni(donor); */
- splice_pos = Substring_chimera_pos(donor);
- acceptor_splicecoord = (*hit3)->ambcoords_acceptor[bingoi3];
- acceptor_knowni = (*hit3)->amb_knowni_acceptor[bingoi3];
- nmismatches_shortend = (*hit3)->amb_nmismatches_acceptor[bingoi3];
- prob_shortend = (*hit3)->amb_probs_acceptor[bingoi3];
- segment_left = acceptor_splicecoord - splice_pos;
-
- if ((acceptor = Substring_new_acceptor(acceptor_splicecoord,acceptor_knowni,splice_pos,nmismatches_shortend,
- /*prob*/prob_shortend,segment_left,query3_compress_fwd,
- querylength3,/*plusp*/true,genestrand,/*first_read_p*/false,/*sensep*/false,
- Substring_chrnum(donor),Substring_chroffset(donor),
- Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
- debug9(printf("Resolved halfsplice donor, End 5: Splice from antidonor #%d to antiacceptor #%d, with nmismatches %d\n",
- Substring_splicecoord(donor) - Substring_chroffset(donor),
- Substring_splicecoord(acceptor) - Substring_chroffset(acceptor),nmismatches_shortend));
- old = *hit3;
- *hit3 = Stage3end_new_splice(&ignore_found_score,Substring_nmismatches_whole(donor),/*nmismatches_acceptor*/nmismatches_shortend,
- donor,acceptor,/*distance*/donor_splicecoord - acceptor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength3,/*amb_length*/0,/*amb_prob*/0.0,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,/*first_read_p*/false,
- /*sensedir*/SENSE_ANTI,/*sarrayp*/false);
- if (*private3p == true) {
- Stage3end_free(&old);
- }
- *private3p = true;
- }
-
- } else if ((*hit3)->hittype == HALFSPLICE_ACCEPTOR) {
- /* End 2 */
- assert((*hit3)->sensedir == SENSE_FORWARD);
- acceptor = (*hit3)->substring_acceptor;
-
- acceptor_splicecoord = Substring_splicecoord(acceptor);
- /* acceptor_knowni = Substring_splicesites_knowni(acceptor); */
- splice_pos = Substring_chimera_pos(acceptor);
- donor_splicecoord = (*hit3)->ambcoords_donor[bingoi3];
- donor_knowni = (*hit3)->amb_knowni_donor[bingoi3];
- nmismatches_shortend = (*hit3)->amb_nmismatches_donor[bingoi3];
- prob_shortend = (*hit3)->amb_probs_donor[bingoi3];
- segment_left = donor_splicecoord - splice_pos;
-
- if ((donor = Substring_new_donor(donor_splicecoord,donor_knowni,splice_pos,nmismatches_shortend,
- /*prob*/prob_shortend,segment_left,query3_compress_fwd,
- querylength3,/*plusp*/true,genestrand,/*first_read_p*/false,/*sensep*/true,
- Substring_chrnum(acceptor),Substring_chroffset(acceptor),
- Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
- debug9(printf("Resolved halfsplice acceptor, End 2: Splice from acceptor %u (%u) to donor %u (%u), with nmismatches %d\n",
- (Chrpos_T) (Substring_splicecoord(acceptor) - Substring_chroffset(acceptor)),
- (Chrpos_T) (acceptor_splicecoord - Substring_chroffset(acceptor)),
- (Chrpos_T) (Substring_splicecoord(donor) - Substring_chroffset(donor)),
- (Chrpos_T) (donor_splicecoord - Substring_chroffset(donor)),nmismatches_shortend));
- old = *hit3;
- *hit3 = Stage3end_new_splice(&ignore_found_score,/*nmismatches_donor*/nmismatches_shortend,Substring_nmismatches_whole(acceptor),
- donor,acceptor,/*distance*/acceptor_splicecoord - donor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength3,/*amb_length*/0,/*amb_prob*/0.0,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,/*first_read_p*/false,
- /*sensedir*/SENSE_FORWARD,/*sarrayp*/false);
- if (*private3p == true) {
- Stage3end_free(&old);
- }
- *private3p = true;
- }
-
- } else {
- fprintf(stderr,"Unexpected hittype %d for ambiguous end\n",(*hit3)->hittype);
- abort();
- }
-
return;
}
static void
-resolve_inside_ambiguous_splice_minus (int *unresolved_amb_length, T *hit5, T *hit3, bool *private5p, bool *private3p,
- Univcoord_T *splicesites,
+resolve_inside_ambiguous_splice_minus (int *unresolved_amb_length, int *amb_resolve_5, int *amb_resolve_3,
+ int *amb_status_inside, T hit5, T hit3, Univcoord_T *splicesites,
Compress_T query5_compress_rev, Compress_T query3_compress_rev,
int localsplicing_penalty, int querylength5, int querylength3,
int genestrand) {
-#ifdef USE_BINGO
int insertlength;
-#endif
Univcoord_T genomicstart, genomicend;
- int nbingo, bingoi5, bingoi3, nbounded, boundedi5, boundedi3, nbest, besti5, besti3, i, j;
+ int nbingo, bingoi5, bingoi3;
+ int nbest, besti5, besti3, i, j;
int best_nmismatches, nmismatches;
bool new5p = false, new3p = false;
- T old;
-
- Substring_T donor, acceptor, shortexon;
- Univcoord_T segment_left;
- int nmismatches_shortend;
- Univcoord_T donor_splicecoord, acceptor_splicecoord;
- int donor_knowni, acceptor_knowni;
- int splice_pos;
- int ignore_found_score = 0;
+ Substring_T substring5, substring3;
#ifdef LARGE_GENOMES
Uint8list_T ambcoords;
#else
Uintlist_T ambcoords;
#endif
- Intlist_T amb_knowni, amb_nmismatches;
- Doublelist_T amb_probs;
- double prob_shortend;
-
+ Univcoord_T *end_ambcoords, *start_ambcoords;
+ int *end_amb_nmismatches, *start_amb_nmismatches;
+ int end_amb_length_5, start_amb_length_3;
*unresolved_amb_length = 0;
- debug9(printf("resolve minus: hit5 %s ambiguous %d,%d and hit3 %s ambiguous %d,%d\n",
- hittype_string((*hit5)->hittype),(*hit5)->start_ambiguous_p,(*hit5)->end_ambiguous_p,
- hittype_string((*hit3)->hittype),(*hit3)->start_ambiguous_p,(*hit3)->end_ambiguous_p));
+ debug9(printf("resolve minus: hit5 %s and hit3 %s\n",
+ hittype_string(hit5->hittype),hittype_string(hit3->hittype)));
+ if (hit5->hittype == GMAP) {
+ substring5 = (Substring_T) NULL;
+ } else {
+ substring5 = (Substring_T) List_head(hit5->substrings_Nto1);
+ debug9(printf("hit5 ambiguous_p %d\n",Substring_ambiguous_p(substring5)));
+ }
+ if (hit3->hittype == GMAP) {
+ substring3 = (Substring_T) NULL;
+ } else {
+ substring3 = (Substring_T) List_head(hit3->substrings_1toN);
+ debug9(printf("hit3 ambiguous_p %d\n",Substring_ambiguous_p(substring3)));
+ }
+
+ if (substring5 != NULL && Substring_ambiguous_p(substring5) == true &&
+ substring3 != NULL && Substring_ambiguous_p(substring3) == true) {
+ debug9(printf("Resolve minus: Got ambiguous at 5' and ambiguous at 3':"));
+ end_ambcoords = Substring_ambcoords(substring5);
+ end_amb_nmismatches = Substring_amb_nmismatches(substring5);
+ start_ambcoords = Substring_ambcoords(substring3);
+ start_amb_nmismatches = Substring_amb_nmismatches(substring3);
+ end_amb_length_5 = end_amb_length(hit5);
+ start_amb_length_3 = start_amb_length(hit3);
- if ((*hit5)->end_ambiguous_p == true && (*hit3)->start_ambiguous_p == true) {
- debug9(printf("Got ambiguous at 5' and ambiguous at 3':"));
- nbest = nbounded = nbingo = 0;
+ nbingo = nbest = 0;
best_nmismatches = querylength5 + querylength3;
- for (i = 0; i < (*hit5)->end_nambcoords; i++) {
- genomicend = (*hit5)->end_ambcoords[i]; /* splicesites[] */
- for (j = 0; j < (*hit3)->start_nambcoords; j++) {
- genomicstart = (*hit3)->start_ambcoords[j]; /* splicesites[] */
- debug9(printf(" %l,%u",(Chrpos_T) (genomicend - (*hit5)->chroffset),(Chrpos_T) (genomicstart - (*hit3)->chroffset)));
+ for (i = 0; i < Substring_nambcoords(substring5); i++) {
+ genomicend = end_ambcoords[i] - end_amb_length_5;
+ for (j = 0; j < Substring_nambcoords(substring3); j++) {
+ genomicstart = start_ambcoords[j] + start_amb_length_3;
+ debug9(printf(" %u,%u",(Chrpos_T) (genomicend - hit5->chroffset),(Chrpos_T) (genomicstart - hit3->chroffset)));
if (genomicstart < genomicend) {
- nbounded++;
- boundedi5 = i;
- boundedi3 = j;
-
-#ifdef USE_BINGO
+ /* Look for valid insertlength */
insertlength = genomicend - genomicstart + querylength5 + querylength3;
debug9(printf(" (%u)",insertlength));
- if (insertlength < expected_pairlength) {
- if (expected_pairlength - insertlength <= pairlength_deviation) {
- nbingo++;
- bingoi5 = i;
- bingoi3 = j;
- debug9(printf("*"));
- }
- } else {
- if (insertlength - expected_pairlength <= pairlength_deviation) {
- nbingo++;
- bingoi5 = i;
- bingoi3 = j;
- debug9(printf("*"));
- }
+ if (insertlength >= expected_pairlength_low && insertlength <= expected_pairlength_high) {
+ nbingo++;
+ bingoi5 = i;
+ bingoi3 = j;
+ debug9(printf("*"));
}
-#endif
- if ((nmismatches = (*hit5)->end_amb_nmismatches[i] + (*hit3)->start_amb_nmismatches[j]) < best_nmismatches) {
+ if ((nmismatches = end_amb_nmismatches[i] + start_amb_nmismatches[j]) < best_nmismatches) {
best_nmismatches = nmismatches;
besti5 = i;
besti3 = j;
@@ -11799,61 +12029,54 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_length, T *hit5, T *h
}
}
-#if 0
- /* No longer holds for GMAP */
- assert((*hit5)->end_amb_length > 0);
- assert((*hit3)->start_amb_length > 0);
-#endif
-
-#ifdef USE_BINGO
if (nbingo == 1) {
- new5p = true; new3p = true;
- } else if (nbounded == 1) {
- new5p = true; new3p = true; bingoi5 = boundedi5; bingoi3 = boundedi3;
- }
-#endif
+ debug9(printf("\nnbingo is 1\n"));
+ *amb_resolve_5 = bingoi5;
+ *amb_resolve_3 = bingoi3;
+ *amb_status_inside = AMB_RESOLVED_BYLENGTH;
- if (nbest == 0) {
+ } else if (nbest == 0) {
debug9(printf("\nnbest is zero: amb_length = %d...%d",
- (*hit5)->end_amb_length,(*hit3)->start_amb_length));
- *unresolved_amb_length = (*hit5)->end_amb_length + (*hit3)->start_amb_length;
+ end_amb_length(hit5),start_amb_length(hit3)));
+ *unresolved_amb_length = end_amb_length_5 + start_amb_length_3;
+ *amb_status_inside = AMB_UNRESOLVED_TOOCLOSE;
+
} else if (nbest == 1) {
debug9(printf("\nnbest is 1, with nmismatches %d\n",best_nmismatches));
- new5p = true; new3p = true; bingoi5 = besti5; bingoi3 = besti3;
+ *amb_resolve_5 = besti5;
+ *amb_resolve_3 = besti3;
+ *amb_status_inside = AMB_RESOLVED_BYMATCHES;
+
+ } else {
+ *amb_resolve_5 = -1; /* Signifies cannot resolve */
+ *amb_resolve_3 = -1;
+ *amb_status_inside = AMB_UNRESOLVED_MULTIPLE;
+
}
debug9(printf("\n"));
- } else if ((*hit5)->end_ambiguous_p == true) {
- debug9(printf("Got ambiguous at 5':"));
- nbest = nbounded = nbingo = 0;
- best_nmismatches = querylength5;
- for (i = 0; i < (*hit5)->end_nambcoords; i++) {
- genomicend = (*hit5)->end_ambcoords[i]; /* splicesites[] */
- debug9(printf(" %u",(Chrpos_T) (genomicend - (*hit5)->chroffset)));
- if ((*hit3)->genomicstart < genomicend /*allow overlap*/+ querylength3) {
- nbounded++;
- boundedi5 = i;
- boundedi3 = j;
+ } else if (substring5 != NULL && Substring_ambiguous_p(substring5) == true) {
+ debug9(printf("Resolve minus: Got ambiguous at 5':"));
+ end_ambcoords = Substring_ambcoords(substring5);
+ end_amb_nmismatches = Substring_amb_nmismatches(substring5);
+ end_amb_length_5 = end_amb_length(hit5);
-#ifdef USE_BINGO
- insertlength = genomicend - (*hit3)->genomicstart + querylength5 + querylength3;
+ nbingo = nbest = 0;
+ best_nmismatches = querylength5;
+ for (i = 0; i < Substring_nambcoords(substring5); i++) {
+ genomicend = end_ambcoords[i] - end_amb_length_5;
+ debug9(printf(" %u",(Chrpos_T) (genomicend - hit5->chroffset)));
+ if (hit3->genomicstart < genomicend /*allow overlap*/+ querylength3) {
+ /* Look for valid insertlength */
+ insertlength = genomicend - hit3->genomicstart + querylength5 + querylength3;
debug9(printf(" (%u)",insertlength));
- if (insertlength < expected_pairlength) {
- if (expected_pairlength - insertlength <= pairlength_deviation) {
- nbingo++;
- bingoi5 = i;
- debug9(printf("*"));
- }
- } else {
- if (insertlength - expected_pairlength <= pairlength_deviation) {
- nbingo++;
- bingoi5 = i;
- debug9(printf("*"));
- }
+ if (insertlength >= expected_pairlength_low && insertlength <= expected_pairlength_high) {
+ nbingo++;
+ bingoi5 = i;
+ debug9(printf("*"));
}
-#endif
- if ((nmismatches = (*hit5)->end_amb_nmismatches[i]) < best_nmismatches) {
+ if ((nmismatches = end_amb_nmismatches[i]) < best_nmismatches) {
best_nmismatches = nmismatches;
besti5 = i;
nbest = 1;
@@ -11863,60 +12086,50 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_length, T *hit5, T *h
}
}
-#if 0
- /* No longer holds for GMAP */
- assert((*hit5)->end_amb_length > 0);
- assert((*hit3)->start_amb_length == 0);
-#endif
-
-#ifdef USE_BINGO
if (nbingo == 1) {
- new5p = true;
- } else if (nbounded == 1) {
- new5p = true; bingoi5 = boundedi5;
- }
-#endif
+ debug9(printf("\nnbingo is 1\n"));
+ *amb_resolve_5 = bingoi5;
+ *amb_status_inside = AMB_RESOLVED_BYLENGTH;
- if (nbest == 0) {
+ } else if (nbest == 0) {
debug9(printf("\nnbest is zero: amb_length = %d...%d",
- (*hit5)->end_amb_length,(*hit3)->start_amb_length));
- *unresolved_amb_length = (*hit5)->end_amb_length;
+ end_amb_length(hit5),start_amb_length(hit3)));
+ *unresolved_amb_length = end_amb_length_5;
+ *amb_status_inside = AMB_UNRESOLVED_TOOCLOSE;
+
} else if (nbest == 1) {
debug9(printf("\nnbest is 1, with nmismatches %d\n",best_nmismatches));
- new5p = true; bingoi5 = besti5;
+ *amb_resolve_5 = besti5;
+ *amb_status_inside = AMB_RESOLVED_BYMATCHES;
+
+ } else {
+ *amb_resolve_5 = -1;
+ *amb_status_inside = AMB_UNRESOLVED_MULTIPLE;
}
debug9(printf("\n"));
- } else if ((*hit3)->start_ambiguous_p == true) {
- debug9(printf("Got ambiguous at 3':"));
- nbest = nbounded = nbingo = 0;
- best_nmismatches = querylength3;
- for (j = 0; j < (*hit3)->start_nambcoords; j++) {
- genomicstart = (*hit3)->start_ambcoords[j]; /* splicesites[] */
- debug9(printf(" %u",(Chrpos_T) (genomicstart - (*hit3)->chroffset)));
- if (genomicstart < (*hit5)->genomicend /*allow overlap*/+ querylength5) {
- nbounded++;
- boundedi3 = j;
+ } else if (substring3 != NULL && Substring_ambiguous_p(substring3) == true) {
+ debug9(printf("Resolve minus: Got ambiguous at 3':"));
+ start_ambcoords = Substring_ambcoords(substring3);
+ start_amb_nmismatches = Substring_amb_nmismatches(substring3);
+ start_amb_length_3 = start_amb_length(hit3);
-#ifdef USE_BINGO
- insertlength = (*hit5)->genomicend - genomicstart + querylength5 + querylength3;
+ nbingo = nbest = 0;
+ best_nmismatches = querylength3;
+ for (j = 0; j < Substring_nambcoords(substring3); j++) {
+ genomicstart = start_ambcoords[j] + start_amb_length_3;
+ debug9(printf(" %u",(Chrpos_T) (genomicstart - hit3->chroffset)));
+ if (genomicstart < hit5->genomicend /*allow overlap*/+ querylength5) {
+ /* Look for valid insertlength */
+ insertlength = hit5->genomicend - genomicstart + querylength5 + querylength3;
debug9(printf(" (%u)",insertlength));
- if (insertlength < expected_pairlength) {
- if (expected_pairlength - insertlength <= pairlength_deviation) {
- nbingo++;
- bingoi3 = j;
- debug9(printf("*"));
- }
- } else {
- if (insertlength - expected_pairlength <= pairlength_deviation) {
- nbingo++;
- bingoi3 = j;
- debug9(printf("*"));
- }
+ if (insertlength >= expected_pairlength_low && insertlength <= expected_pairlength_high) {
+ nbingo++;
+ bingoi3 = j;
+ debug9(printf("*"));
}
-#endif
- if ((nmismatches = (*hit3)->start_amb_nmismatches[j]) < best_nmismatches) {
+ if ((nmismatches = start_amb_nmismatches[j]) < best_nmismatches) {
best_nmismatches = nmismatches;
besti3 = j;
nbest = 1;
@@ -11926,438 +12139,30 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_length, T *hit5, T *h
}
}
-#if 0
- /* No longer holds for GMAP */
- assert((*hit5)->end_amb_length == 0);
- assert((*hit3)->start_amb_length > 0);
-#endif
-
-#ifdef USE_BINGO
if (nbingo == 1) {
- new3p = true;
- } else if (nbounded == 1) {
- new3p = true; bingoi3 = boundedi3;
- }
-#endif
+ debug9(printf("\nnbingo is 1\n"));
+ *amb_resolve_3 = bingoi3;
+ *amb_status_inside = AMB_RESOLVED_BYLENGTH;
- if (nbest == 0) {
+ } else if (nbest == 0) {
debug9(printf("\nnbest is zero: amb_length = %d...%d",
- (*hit5)->end_amb_length,(*hit3)->start_amb_length));
- *unresolved_amb_length = (*hit3)->start_amb_length;
+ end_amb_length(hit5),start_amb_length(hit3)));
+ *unresolved_amb_length = start_amb_length_3;
+ *amb_status_inside = AMB_UNRESOLVED_TOOCLOSE;
+
} else if (nbest == 1) {
debug9(printf("\nnbest is 1, with nmismatches %d\n",best_nmismatches));
- new3p = true; bingoi3 = besti3;
- }
- debug9(printf("\n"));
- }
-
- if (new5p == false) {
- /* Skip */
+ *amb_resolve_3 = besti3;
+ *amb_status_inside = AMB_RESOLVED_BYMATCHES;
- } else if ((*hit5)->hittype == ONE_THIRD_SHORTEXON || (*hit5)->hittype == TWO_THIRDS_SHORTEXON) {
- if ((*hit5)->sensedir == SENSE_FORWARD) {
- /* End 3 */
- shortexon = (*hit5)->substring1;
-
- donor_splicecoord = Substring_splicecoord_D(shortexon);
- /* donor_knowni = Substring_splicesites_knowni_D(shortexon); */
- splice_pos = Substring_chimera_pos_D(shortexon);
- acceptor_splicecoord = (*hit5)->ambcoords_acceptor[bingoi5];
- acceptor_knowni = (*hit5)->amb_knowni_acceptor[bingoi5];
- nmismatches_shortend = (*hit5)->amb_nmismatches_acceptor[bingoi5];
- prob_shortend = (*hit5)->amb_probs_acceptor[bingoi5];
- segment_left = acceptor_splicecoord - (querylength5 - splice_pos);
-
- if ((acceptor = Substring_new_acceptor(acceptor_splicecoord,acceptor_knowni,
- querylength5 - splice_pos,nmismatches_shortend,
- /*prob*/prob_shortend,segment_left,query5_compress_rev,
- querylength5,/*plusp*/false,genestrand,/*first_read_p*/true,/*sensep*/true,
- Substring_chrnum(shortexon),Substring_chroffset(shortexon),
- Substring_chrhigh(shortexon),Substring_chrlength(shortexon))) != NULL) {
- debug9(printf("Resolved shortexon, End 3: Splice from donor #%d to acceptor #%d, with nmismatches %d\n",
- donor_splicecoord - Substring_chroffset(shortexon),
- acceptor_splicecoord - Substring_chroffset(shortexon),nmismatches_shortend));
- old = *hit5;
-#ifdef LARGE_GENOMES
- ambcoords = Uint8list_from_array(old->ambcoords_donor,old->nambcoords_donor);
-#else
- ambcoords = Uintlist_from_array(old->ambcoords_donor,old->nambcoords_donor);
-#endif
- amb_knowni = Intlist_from_array(old->amb_knowni_donor,old->nambcoords_donor);
- amb_nmismatches = Intlist_from_array(old->amb_nmismatches_donor,old->nambcoords_donor);
- amb_probs = Doublelist_from_array(old->amb_probs_donor,old->nambcoords_donor);
-
- *hit5 = Stage3end_new_shortexon(&ignore_found_score,/*donor*/old->substringD,acceptor,shortexon,
- old->amb_length_donor,/*amb_length_acceptor*/0,
- /*amb_prob_donor*/Doublelist_max(amb_probs),/*amb_prob_acceptor*/0.0,
- ambcoords,/*ambcoords_acceptor*/NULL,
- amb_knowni,/*amb_knowni_acceptor*/NULL,
- amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
- amb_probs,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true,
- localsplicing_penalty,querylength5,/*sensedir*/SENSE_FORWARD,
- /*sarrayp*/false);
- Doublelist_free(&amb_probs);
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
-#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords);
-#else
- Uintlist_free(&ambcoords);
-#endif
-
- if (*private5p == true) {
- Stage3end_free(&old);
- }
- *private5p = true;
- }
-
- } else if ((*hit5)->sensedir == SENSE_ANTI) {
- /* End 8 */
- shortexon = (*hit5)->substring1;
-
- acceptor_splicecoord = Substring_splicecoord_A(shortexon);
- /* acceptor_knowni = Substring_splicesites_knowni_A(shortexon); */
- splice_pos = Substring_chimera_pos_A(shortexon);
- donor_splicecoord = (*hit5)->ambcoords_donor[bingoi5];
- donor_knowni = (*hit5)->amb_knowni_donor[bingoi5];
- nmismatches_shortend = (*hit5)->amb_nmismatches_donor[bingoi5];
- prob_shortend = (*hit5)->amb_probs_donor[bingoi5];
- segment_left = donor_splicecoord - (querylength5 - splice_pos);
-
- if ((donor = Substring_new_donor(donor_splicecoord,donor_knowni,querylength5 - splice_pos,nmismatches_shortend,
- /*prob*/prob_shortend,segment_left,query5_compress_rev,
- querylength5,/*plusp*/false,genestrand,/*first_read_p*/true,/*sensep*/false,
- Substring_chrnum(shortexon),Substring_chroffset(shortexon),
- Substring_chrhigh(shortexon),Substring_chrlength(shortexon))) != NULL) {
- debug9(printf("Resolved shortexon, End 8: Splice from antiacceptor #%d to antidonor #%d, with nmismatches_shortend %d\n",
- acceptor_splicecoord - Substring_chroffset(shortexon),
- donor_splicecoord - Substring_chroffset(shortexon),nmismatches_shortend));
- old = *hit5;
-#ifdef LARGE_GENOMES
- ambcoords = Uint8list_from_array(old->ambcoords_acceptor,old->nambcoords_acceptor);
-#else
- ambcoords = Uintlist_from_array(old->ambcoords_acceptor,old->nambcoords_acceptor);
-#endif
- amb_knowni = Intlist_from_array(old->amb_knowni_acceptor,old->nambcoords_acceptor);
- amb_nmismatches = Intlist_from_array(old->amb_nmismatches_acceptor,old->nambcoords_acceptor);
- amb_probs = Doublelist_from_array(old->amb_probs_acceptor,old->nambcoords_acceptor);
-
- *hit5 = Stage3end_new_shortexon(&ignore_found_score,donor,/*acceptor*/old->substringA,shortexon,
- /*amb_length_donor*/0,old->amb_length_acceptor,
- /*amb_prob_donor*/0.0,/*amb_prob_acceptor*/Doublelist_max(amb_probs),
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,amb_knowni,
- /*amb_nmismatches_donor*/NULL,amb_nmismatches,
- /*amb_probs_donor*/NULL,amb_probs,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true,
- localsplicing_penalty,querylength5,/*sensedir*/SENSE_ANTI,
- /*sarrayp*/false);
- Doublelist_free(&amb_probs);
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
-#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords);
-#else
- Uintlist_free(&ambcoords);
-#endif
-
- if (*private5p == true) {
- Stage3end_free(&old);
- }
- *private5p = true;
- }
-
} else {
- fprintf(stderr,"Shortexon hit5 has no sensedir\n");
- abort();
+ *amb_resolve_3 = -1;
+ *amb_status_inside = AMB_UNRESOLVED_MULTIPLE;
}
-
- } else if ((*hit5)->hittype == HALFSPLICE_DONOR) {
- /* End 3 */
- assert((*hit5)->sensedir == SENSE_FORWARD);
- donor = (*hit5)->substring_donor;
-
- donor_splicecoord = Substring_splicecoord(donor);
- /* donor_knowni = Substring_splicesites_knowni(donor); */
- splice_pos = Substring_chimera_pos(donor);
- acceptor_splicecoord = (*hit5)->ambcoords_acceptor[bingoi5];
- acceptor_knowni = (*hit5)->amb_knowni_acceptor[bingoi5];
- nmismatches_shortend = (*hit5)->amb_nmismatches_acceptor[bingoi5];
- prob_shortend = (*hit5)->amb_probs_acceptor[bingoi5];
- segment_left = acceptor_splicecoord - (querylength5 - splice_pos);
-
- if ((acceptor = Substring_new_acceptor(acceptor_splicecoord,acceptor_knowni,querylength5 - splice_pos,nmismatches_shortend,
- /*prob*/prob_shortend,segment_left,query5_compress_rev,
- querylength5,/*plusp*/false,genestrand,/*first_read_p*/true,/*sensep*/true,
- Substring_chrnum(donor),Substring_chroffset(donor),
- Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
- debug9(printf("Resolved halfsplice, End 3: Splice from donor #%d to acceptor #%d, with nmismatches %d\n",
- Substring_splicecoord(donor) - Substring_chroffset(donor),
- Substring_splicecoord(acceptor) - Substring_chroffset(acceptor),nmismatches_shortend));
- old = *hit5;
- *hit5 = Stage3end_new_splice(&ignore_found_score,Substring_nmismatches_whole(donor),/*nmismatches_acceptor*/nmismatches_shortend,
- donor,acceptor,/*distance*/donor_splicecoord - acceptor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength5,/*amb_length*/0,/*amb_prob*/0.0,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,/*first_read_p*/true,
- /*sensedir*/SENSE_FORWARD,/*sarrayp*/false);
- if (*private5p == true) {
- Stage3end_free(&old);
- }
- *private5p = true;
- }
-
- } else if ((*hit5)->hittype == HALFSPLICE_ACCEPTOR) {
- /* End 8 */
- assert((*hit5)->sensedir == SENSE_ANTI);
- acceptor = (*hit5)->substring_acceptor;
-
- acceptor_splicecoord = Substring_splicecoord(acceptor);
- /* acceptor_knowni = Substring_splicesites_knowni(acceptor); */
- splice_pos = Substring_chimera_pos(acceptor);
- donor_splicecoord = (*hit5)->ambcoords_donor[bingoi5];
- donor_knowni = (*hit5)->amb_knowni_donor[bingoi5];
- nmismatches_shortend = (*hit5)->amb_nmismatches_donor[bingoi5];
- prob_shortend = (*hit5)->amb_probs_donor[bingoi5];
- segment_left = donor_splicecoord - (querylength5 - splice_pos);
-
- /* BUG HERE */
- if ((donor = Substring_new_donor(donor_splicecoord,donor_knowni,querylength5 - splice_pos,nmismatches_shortend,
- /*prob*/prob_shortend,segment_left,query5_compress_rev,
- querylength5,/*plusp*/false,genestrand,/*first_read_p*/true,/*sensep*/false,
- Substring_chrnum(acceptor),Substring_chroffset(acceptor),
- Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
- debug9(printf("Resolved halfsplice acceptor, End 8: Splice from antiacceptor #%d to antidonor #%d, with nmismatches %d\n",
- Substring_splicecoord(acceptor) - Substring_chroffset(acceptor),
- Substring_splicecoord(donor) - Substring_chroffset(donor),nmismatches_shortend));
- old = *hit5;
- *hit5 = Stage3end_new_splice(&ignore_found_score,/*nmismatches_donor*/nmismatches_shortend,Substring_nmismatches_whole(acceptor),
- donor,acceptor,/*distance*/acceptor_splicecoord - donor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength5,/*amb_length*/0,/*amb_prob*/0.0,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,/*first_read_p*/true,
- /*sensedir*/SENSE_ANTI,/*sarrayp*/false);
- if (*private5p == true) {
- Stage3end_free(&old);
- }
- *private5p = true;
- }
-
- } else {
- fprintf(stderr,"Unexpected hittype %d for ambiguous end\n",(*hit5)->hittype);
- abort();
+ debug9(printf("\n"));
}
- if (new3p == false) {
- /* Skip */
-
- } else if ((*hit3)->hittype == ONE_THIRD_SHORTEXON || (*hit3)->hittype == TWO_THIRDS_SHORTEXON) {
- if ((*hit3)->sensedir == SENSE_ANTI) {
- /* End 7 */
- shortexon = (*hit3)->substring1;
-
- donor_splicecoord = Substring_splicecoord_D(shortexon);
- /* donor_knowni = Substring_splicesites_knowni_D(shortexon); */
- splice_pos = Substring_chimera_pos_D(shortexon);
- acceptor_splicecoord = (*hit3)->ambcoords_acceptor[bingoi3];
- acceptor_knowni = (*hit3)->amb_knowni_acceptor[bingoi3];
- nmismatches_shortend = (*hit3)->amb_nmismatches_acceptor[bingoi3];
- prob_shortend = (*hit3)->amb_probs_acceptor[bingoi3];
- segment_left = acceptor_splicecoord - (querylength3 - splice_pos);
-
- if ((acceptor = Substring_new_acceptor(acceptor_splicecoord,acceptor_knowni,querylength3 - splice_pos,nmismatches_shortend,
- /*prob*/prob_shortend,segment_left,query3_compress_rev,
- querylength3,/*plusp*/false,genestrand,/*first_read_p*/false,/*sensep*/false,
- Substring_chrnum(shortexon),Substring_chroffset(shortexon),
- Substring_chrhigh(shortexon),Substring_chrlength(shortexon))) != NULL) {
- debug9(printf("Resolved shortexon, End 7: Splice from antidonor #%d to antiacceptor #%d, with nmismatches %d\n",
- donor_splicecoord - Substring_chroffset(shortexon),
- acceptor_splicecoord - Substring_chroffset(shortexon),nmismatches_shortend));
- old = *hit3;
-#ifdef LARGE_GENOMES
- ambcoords = Uint8list_from_array(old->ambcoords_donor,old->nambcoords_donor);
-#else
- ambcoords = Uintlist_from_array(old->ambcoords_donor,old->nambcoords_donor);
-#endif
- amb_knowni = Intlist_from_array(old->amb_knowni_donor,old->nambcoords_donor);
- amb_nmismatches = Intlist_from_array(old->amb_nmismatches_donor,old->nambcoords_donor);
- amb_probs = Doublelist_from_array(old->amb_probs_donor,old->nambcoords_donor);
-
- *hit3 = Stage3end_new_shortexon(&ignore_found_score,/*donor*/old->substringD,acceptor,shortexon,
- old->amb_length_donor,/*amb_length_acceptor*/0,
- /*amb_prob_donor*/Doublelist_max(amb_probs),/*amb_prob_acceptor*/0.0,
- ambcoords,/*ambcoords_acceptor*/NULL,
- amb_knowni,/*amb_knowni_acceptor*/NULL,
- amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
- amb_probs,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,/*copy_shortexon_p*/true,
- localsplicing_penalty,querylength3,/*sensedir*/SENSE_ANTI,
- /*sarrayp*/false);
- Doublelist_free(&amb_probs);
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
-#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords);
-#else
- Uintlist_free(&ambcoords);
-#endif
-
- if (*private3p == true) {
- Stage3end_free(&old);
- }
- *private3p = true;
- }
-
- } else if ((*hit3)->sensedir == SENSE_FORWARD) {
- /* End 4 */
- shortexon = (*hit3)->substring1;
-
- acceptor_splicecoord = Substring_splicecoord_A(shortexon);
- /* acceptor_knowni = Substring_splicesites_knowni_A(shortexon); */
- splice_pos = Substring_chimera_pos_A(shortexon);
- donor_splicecoord = (*hit3)->ambcoords_donor[bingoi3];
- donor_knowni = (*hit3)->amb_knowni_donor[bingoi3];
- nmismatches_shortend = (*hit3)->amb_nmismatches_donor[bingoi3];
- prob_shortend = (*hit3)->amb_probs_donor[bingoi3];
- segment_left = donor_splicecoord - (querylength3 - splice_pos);
-
- if ((donor = Substring_new_donor(donor_splicecoord,donor_knowni,querylength3 - splice_pos,nmismatches_shortend,
- /*prob*/prob_shortend,segment_left,query3_compress_rev,
- querylength3,/*plusp*/false,genestrand,/*first_read_p*/false,/*sensep*/true,
- Substring_chrnum(shortexon),Substring_chroffset(shortexon),
- Substring_chrhigh(shortexon),Substring_chrlength(shortexon))) != NULL) {
- debug9(printf("Resolved halfsplice_acceptor, End 4: Splice from acceptor #%d to #%d, with nmismatches %d\n",
- acceptor_splicecoord - Substring_chroffset(shortexon),
- donor_splicecoord - Substring_chroffset(shortexon),nmismatches_shortend));
- old = *hit3;
-#ifdef LARGE_GENOMES
- ambcoords = Uint8list_from_array(old->ambcoords_acceptor,old->nambcoords_acceptor);
-#else
- ambcoords = Uintlist_from_array(old->ambcoords_acceptor,old->nambcoords_acceptor);
-#endif
- amb_knowni = Intlist_from_array(old->amb_knowni_acceptor,old->nambcoords_acceptor);
- amb_nmismatches = Intlist_from_array(old->amb_nmismatches_acceptor,old->nambcoords_acceptor);
- amb_probs = Doublelist_from_array(old->amb_probs_acceptor,old->nambcoords_acceptor);
-
- *hit3 = Stage3end_new_shortexon(&ignore_found_score,donor,/*acceptor*/old->substringA,shortexon,
- /*amb_length_donor*/0,old->amb_length_acceptor,
- /*amb_prob_donor*/0.0,/*amb_prob_acceptor*/Doublelist_max(amb_probs),
- /*ambcoords_donor*/NULL,ambcoords,
- /*amb_knowni_donor*/NULL,amb_knowni,
- /*amb_nmismatches_donor*/NULL,amb_nmismatches,
- /*amb_probs_donor*/NULL,amb_probs,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,/*copy_shortexon_p*/true,
- localsplicing_penalty,querylength3,/*sensedir*/SENSE_FORWARD,
- /*sarrayp*/false);
- Doublelist_free(&amb_probs);
- Intlist_free(&amb_nmismatches);
- Intlist_free(&amb_knowni);
-#ifdef LARGE_GENOMES
- Uint8list_free(&ambcoords);
-#else
- Uintlist_free(&ambcoords);
-#endif
-
- if (*private3p == true) {
- Stage3end_free(&old);
- }
- *private3p = true;
- }
-
- } else {
- fprintf(stderr,"Shortexon hit3 has no sensedir\n");
- abort();
- }
-
- } else if ((*hit3)->hittype == HALFSPLICE_DONOR) {
- /* End 7 */
- assert((*hit3)->sensedir == SENSE_ANTI);
- donor = (*hit3)->substring_donor;
-
- donor_splicecoord = Substring_splicecoord(donor);
- /* donor_knowni = Substring_splicesites_knowni(donor); */
- splice_pos = Substring_chimera_pos(donor);
- acceptor_splicecoord = (*hit3)->ambcoords_acceptor[bingoi3];
- acceptor_knowni = (*hit3)->amb_knowni_acceptor[bingoi3];
- nmismatches_shortend = (*hit3)->amb_nmismatches_acceptor[bingoi3];
- prob_shortend = (*hit3)->amb_probs_acceptor[bingoi3];
- segment_left = acceptor_splicecoord - (querylength3 - splice_pos);
-
- if ((acceptor = Substring_new_acceptor(acceptor_splicecoord,acceptor_knowni,querylength3 - splice_pos,nmismatches_shortend,
- /*prob*/prob_shortend,segment_left,query3_compress_rev,
- querylength3,/*plusp*/false,genestrand,/*first_read_p*/false,/*sensep*/false,
- Substring_chrnum(donor),Substring_chroffset(donor),
- Substring_chrhigh(donor),Substring_chrlength(donor))) != NULL) {
- debug9(printf("Resolved halfsplice_donor, End 7: Splice from antidonor #%d to antiacceptor #%d, with nmismatches %d\n",
- Substring_splicecoord(donor) - Substring_chroffset(donor),
- Substring_splicecoord(acceptor) - Substring_chroffset(acceptor),nmismatches_shortend));
- old = *hit3;
- *hit3 = Stage3end_new_splice(&ignore_found_score,Substring_nmismatches_whole(donor),/*nmismatches_acceptor*/nmismatches_shortend,
- donor,acceptor,/*distance*/acceptor_splicecoord - donor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength3,/*amb_length*/0,/*amb_prob*/0.0,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/true,/*copy_acceptor_p*/false,/*first_read_p*/false,
- /*sensedir*/SENSE_ANTI,/*sarrayp*/false);
- if (*private3p == true) {
- Stage3end_free(&old);
- }
- *private3p = true;
- }
-
- } else if ((*hit3)->hittype == HALFSPLICE_ACCEPTOR) {
- /* End 4 */
- assert((*hit3)->sensedir == SENSE_FORWARD);
- acceptor = (*hit3)->substring_acceptor;
-
- acceptor_splicecoord = Substring_splicecoord(acceptor);
- /* acceptor_knowni = Substring_splicesites_knowni(acceptor); */
- splice_pos = Substring_chimera_pos(acceptor);
- donor_splicecoord = (*hit3)->ambcoords_donor[bingoi3];
- donor_knowni = (*hit3)->amb_knowni_donor[bingoi3];
- nmismatches_shortend = (*hit3)->amb_nmismatches_donor[bingoi3];
- prob_shortend = (*hit3)->amb_probs_donor[bingoi3];
- segment_left = donor_splicecoord - (querylength3 - splice_pos);
-
- if ((donor = Substring_new_donor(donor_splicecoord,donor_knowni,querylength3 - splice_pos,nmismatches_shortend,
- /*prob*/prob_shortend,segment_left,query3_compress_rev,
- querylength3,/*plusp*/false,genestrand,/*first_read_p*/false,/*sensep*/true,
- Substring_chrnum(acceptor),Substring_chroffset(acceptor),
- Substring_chrhigh(acceptor),Substring_chrlength(acceptor))) != NULL) {
- debug9(printf("Resolved halfsplice_acceptor, End 4: Splice from acceptor #%d to #%d, with nmismatches %d\n",
- Substring_splicecoord(acceptor) - Substring_chroffset(acceptor),
- Substring_splicecoord(donor) - Substring_chroffset(acceptor),nmismatches_shortend));
- old = *hit3;
- *hit3 = Stage3end_new_splice(&ignore_found_score,/*nmismatches_donor*/nmismatches_shortend,Substring_nmismatches_whole(acceptor),
- donor,acceptor,/*distance*/donor_splicecoord - acceptor_splicecoord,
- /*shortdistancep*/true,localsplicing_penalty,querylength3,/*amb_length*/0,/*amb_prob*/0.0,
- /*ambcoords_donor*/NULL,/*ambcoords_acceptor*/NULL,
- /*amb_knowni_donor*/NULL,/*amb_knowni_acceptor*/NULL,
- /*amb_nmismatches_donor*/NULL,/*amb_nmismatches_acceptor*/NULL,
- /*amb_probs_donor*/NULL,/*amb_probs_acceptor*/NULL,
- /*copy_donor_p*/false,/*copy_acceptor_p*/true,/*first_read_p*/false,
- /*sensedir*/SENSE_FORWARD,/*sarrayp*/false);
- if (*private3p == true) {
- Stage3end_free(&old);
- }
- *private3p = true;
- }
-
- } else {
- fprintf(stderr,"Unexpected hittype %d for ambiguous end\n",(*hit3)->hittype);
- abort();
- }
-
return;
}
@@ -12366,15 +12171,18 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_length, T *hit5, T *h
static void
alias_circular (T hit) {
Chrpos_T chrlength = hit->chrlength;
+ List_T p;
+ Substring_T substring;
assert(hit->alias == -1);
if (hit->hittype == GMAP) {
Pair_alias_circular(hit->pairarray,hit->npairs,chrlength);
} else {
- Substring_alias_circular(hit->substring0);
- Substring_alias_circular(hit->substring1);
- Substring_alias_circular(hit->substring2);
+ for (p = hit->substrings_1toN; p != NULL; p = List_next(p)) {
+ substring = (Substring_T) List_head(p);
+ Substring_alias_circular(substring);
+ }
}
/* Doesn't fix hitpair->low and hitpair->high */
@@ -12389,6 +12197,149 @@ alias_circular (T hit) {
}
+static int
+compute_insertlength (Stage3pair_T this) {
+ T hit5, hit3;
+ Chrpos_T chrstart, chrend, chrpos;
+ int querypos;
+ int querylength5, querylength3;
+
+
+ hit5 = this->hit5;
+ hit3 = this->hit3;
+ querylength5 = hit5->querylength;
+ querylength3 = hit3->querylength;
+
+ if (hit5->hittype == GMAP && hit3->hittype == GMAP) {
+ debug10(printf("Got hit5 and hit3 both of type GMAP\n"));
+
+ /* Do not try to resolve ambiguity on inside of concordant ends */
+ if (hit5->plusp == true && hit3->plusp == true) {
+ return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
+ } else if (hit5->plusp == false && hit3->plusp == false) {
+ return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
+ } else {
+ return pair_insert_length_unpaired(hit5,hit3);
+ }
+
+ } else if (hit5->hittype == GMAP) {
+ debug10(printf("Got hit5 of type GMAP\n"));
+ if (hit5->plusp == true && hit3->plusp == true) {
+ /* Have 5-start..end and 3-start..end */
+ debug10(printf("plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n",
+ hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset));
+
+ if (hit5->genomicend <= hit3->genomicstart) {
+ /* No overlap */
+ return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
+ } else if ((chrpos = overlap3_gmap_plus(&querypos,&chrstart,&chrend,/*hit*/hit3,/*gmap*/hit5)) > 0U) {
+ return /* end3 */ chrend - /* start5 */ (chrpos - querypos);
+ } else {
+ /* Still no overlap */
+ return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
+ }
+
+ } else if (hit5->plusp == false && hit3->plusp == false) {
+ /* Have 3-end..start and 5-end..start */
+ debug10(printf("minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n",
+ hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset));
+
+ if (hit3->genomicstart <= hit5->genomicend) {
+ return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
+ } else if ((chrpos = overlap3_gmap_minus(&querypos,&chrstart,&chrend,/*hit*/hit3,/*gmap*/hit5)) > 0U) {
+ return /* start5 */ (chrpos + querypos) - /* end3 */ chrend + 1;
+ } else {
+ /* Still no overlap */
+ return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
+ }
+ } else {
+ return pair_insert_length_unpaired(hit5,hit3);
+ }
+
+ } else if (hit3->hittype == GMAP) {
+ debug10(printf("Got hit3 of type GMAP\n"));
+ if (hit5->plusp == true && hit3->plusp == true) {
+ /* Have 5-start..end and 3-start..end */
+ debug10(printf("plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n",
+ hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset));
+
+ if (hit5->genomicend <= hit3->genomicstart) {
+ /* No overlap */
+ return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
+ } else if ((chrpos = overlap5_gmap_plus(&querypos,&chrstart,&chrend,/*hit*/hit5,/*gmap*/hit3)) > 0U) {
+ return /* end3 */ (chrpos - querypos + querylength3) - /* start5 */ chrstart;
+ } else {
+ /* Still no overlap */
+ return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
+ }
+
+ } else if (hit5->plusp == false && hit3->plusp == false) {
+ /* Have 3-end..start and 5-end..start */
+ debug10(printf("minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n",
+ hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset));
+ if (hit3->genomicstart <= hit5->genomicend) {
+ /* No overlap */
+ return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
+ } else if ((chrpos = overlap5_gmap_minus(&querypos,&chrstart,&chrend,/*hit*/hit5,/*gmap*/hit3)) > 0U) {
+ return /* start5 */ chrstart - /* end3 */ (chrpos + querypos - querylength3) - 1;
+ } else {
+ /* Still no overlap */
+ return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
+ }
+ } else {
+ return pair_insert_length_unpaired(hit5,hit3);
+ }
+
+ } else if (hit5->plusp == true && hit3->plusp == false) {
+ /* Have 5-start..end and 3-end..start */
+ /* or 3-end..start and 5-start..end */
+
+ if (hit5->genomicend < hit3->genomicend) {
+ return (hit3->genomicend - hit5->genomicend) + querylength5 + querylength3;
+ } else if (hit3->genomicstart < hit5->genomicstart) {
+ return (hit5->genomicstart - hit3->genomicstart) + querylength5 + querylength3;
+ } else {
+ return pair_insert_length_unpaired(hit5,hit3);
+ }
+
+ } else if (hit5->plusp == false && hit3->plusp == true) {
+ /* Have 5-end..start and 3-start..end */
+ /* or 3-start..end and 5-end..start */
+
+ if (hit5->genomicstart < hit3->genomicstart) {
+ return (hit3->genomicstart - hit5->genomicstart) + querylength5 + querylength3;
+ } else if (hit3->genomicend < hit5->genomicend) {
+ return (hit5->genomicend - hit3->genomicend) + querylength5 + querylength3;
+ } else {
+ return pair_insert_length_unpaired(hit5,hit3);
+ }
+
+ } else if (hit5->plusp == true) {
+ /* Concordant directions on same chromosome (plus) */
+ debug10(printf("Concordant on plus strand\n"));
+ /* Have 5-start..end and 3-start..end */
+ if (hit5->genomicend < hit3->genomicstart) {
+ /* No overlap */
+ return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
+ } else {
+ return pair_insert_length(hit5,hit3);
+ }
+
+
+ } else {
+ /* Concordant directions on same chromosome (minus) */
+ debug10(printf("Concordant on minus strand\n"));
+ /* Have 3-end..start and 5-end..start */
+ if (hit3->genomicstart < hit5->genomicend) {
+ /* No overlap */
+ return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
+ } else {
+ return pair_insert_length(hit5,hit3);
+ }
+ }
+}
+
+
Stage3pair_T
Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
@@ -12398,14 +12349,15 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
bool private5p, bool private3p, bool expect_concordant_p) {
Stage3pair_T new;
Stage3end_T copy;
+ Substring_T substring1, substringN;
Chrpos_T chrstart, chrend, chrpos;
int querypos;
int unresolved_amb_length = 0;
int found_score = 0;
bool overreach5p, overreach3p;
- int querylength5 = hit5->querylength_adj;
- int querylength3 = hit3->querylength_adj;
+ int querylength5 = hit5->querylength;
+ int querylength3 = hit3->querylength;
debug10(printf("\nStage3pair_new called with pairtype %s and chrnum %d, %d (effective %d, %d)\n",
Pairtype_string(pairtype),hit5->chrnum,hit3->chrnum,hit5->effective_chrnum,hit3->effective_chrnum));
@@ -12418,6 +12370,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
if (private3p == true) {
Stage3end_free(&hit3);
}
+ debug5(printf("Rejecting terminal as NULL because hit5 trim %d+%d > reject_trimlength %d\n",hit5->trim_left,hit5->trim_right,reject_trimlength));
return (Stage3pair_T) NULL;
} else if (hit3->hittype == TERMINAL && hit3->trim_left + hit3->trim_right >= reject_trimlength) {
@@ -12428,6 +12381,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
if (private3p == true) {
Stage3end_free(&hit3);
}
+ debug5(printf("Rejecting terminal as NULL because hit3 trim %d+%d > reject_trimlength %d\n",hit3->trim_left,hit3->trim_right,reject_trimlength));
return (Stage3pair_T) NULL;
} else {
new = (Stage3pair_T) MALLOC_OUT(sizeof(*new));
@@ -12443,6 +12397,10 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
}
new->pairtype = pairtype;
new->genestrand = genestrand;
+ new->amb_resolve_5 = -1;
+ new->amb_resolve_3 = -1;
+ new->amb_status_inside = AMB_NOT_AMBIGUOUS;
+
#if 0
new->mapq_loglik = hit5->mapq_loglik + hit3->mapq_loglik;
@@ -12458,19 +12416,19 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
new->dir = +1;
new->insertlength = (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("plus, no overlap: insert length %d = start3 %llu - end5 %llu + %d + %d\n",
- new->insertlength,(unsigned long long) hit3->genomicstart,
- (unsigned long long) hit5->genomicend,querylength5,querylength3));
+ debug10(printf("plus, no overlap: insert length %d = start3 %u - end5 %u + %d + %d\n",
+ new->insertlength,hit3->genomicstart - hit3->chroffset,
+ hit5->genomicend - hit5->chroffset,querylength5,querylength3));
} else if (hit5->plusp == false && hit3->plusp == false) {
new->dir = -1;
new->insertlength = (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("minus, no overlap: insert length %d = end5 %llu - start3 %llu + %d + %d\n",
- new->insertlength,(unsigned long long) hit5->genomicend,
- (unsigned long long) hit3->genomicstart,querylength5,querylength3));
+ debug10(printf("minus, no overlap: insert length %d = end5 %u - start3 %u + %d + %d\n",
+ new->insertlength,hit5->genomicend - hit5->chroffset,
+ hit3->genomicstart - hit3->chroffset,querylength5,querylength3));
} else {
new->dir = 0;
- new->insertlength = pair_insert_length_unpaired(hit5,hit3); /* was 0 */
+ new->insertlength = pair_insert_length_unpaired(hit5,hit3);
new->insertlength_expected_sign = false;
}
@@ -12481,31 +12439,45 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
if (expect_concordant_p == true) {
/* Try to resolve ambiguity on inside of concordant ends */
- resolve_inside_ambiguous_splice_plus(&unresolved_amb_length,&hit5,&hit3,&private5p,&private3p,
+ resolve_inside_ambiguous_splice_plus(&unresolved_amb_length,&new->amb_resolve_5,&new->amb_resolve_3,
+ &new->amb_status_inside,hit5,hit3,
splicesites,query5_compress_fwd,query3_compress_fwd,
localsplicing_penalty,querylength5,querylength3,genestrand);
}
/* Have 5-start..end and 3-start..end */
- debug10(printf("plus: comparing hit5->genomicend %llu <= hit3->genomicstart %llu\n",
- (unsigned long long) hit5->genomicend,(unsigned long long) hit3->genomicstart));
+ debug10(printf("plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n",
+ hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset));
if (hit5->genomicend <= hit3->genomicstart) {
/* No overlap */
new->insertlength = (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("plus, no overlap: insert length %d = start3 %llu - end5 %llu + %d + %d\n",
- new->insertlength,(unsigned long long) hit3->genomicstart,
- (unsigned long long) hit5->genomicend,querylength5,querylength3));
+ debug10(printf("plus, no overlap: insert length %d = start3 %u - end5 %u + %d + %d\n",
+ new->insertlength,hit3->genomicstart - hit3->chroffset,
+ hit5->genomicend - hit5->chroffset,querylength5,querylength3));
} else if ((chrpos = overlap3_gmap_plus(&querypos,&chrstart,&chrend,/*hit*/hit3,/*gmap*/hit5)) > 0U) {
new->insertlength = /* end3 */ chrend - /* start5 */ (chrpos - querypos);
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("plus, overlap: insert length %d = end3 %llu - start5 (%llu - %d)\n",
- new->insertlength,(unsigned long long) chrend,(unsigned long long) chrpos,querypos));
+ debug10(printf("plus, overlap: insert length %d = end3 %u - start5 (%u - %d)\n",
+ new->insertlength,chrend,chrpos,querypos));
} else {
/* Still no overlap */
new->insertlength = (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
+
+ if (new->insertlength <= 0) {
+ /* Overreach */
+ debug5(printf(" Returning NULL because of overreach\n"));
+ if (private5p == true) {
+ Stage3end_free(&hit5);
+ }
+ if (private3p == true) {
+ Stage3end_free(&hit3);
+ }
+ FREE_OUT(new);
+ return (Stage3pair_T) NULL;
+ }
}
} else if (hit5->plusp == false && hit3->plusp == false) {
@@ -12513,32 +12485,47 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
if (expect_concordant_p == true) {
/* Try to resolve ambiguity on inside of concordant ends */
- resolve_inside_ambiguous_splice_minus(&unresolved_amb_length,&hit5,&hit3,&private5p,&private3p,
+ resolve_inside_ambiguous_splice_minus(&unresolved_amb_length,&new->amb_resolve_5,&new->amb_resolve_3,
+ &new->amb_status_inside,hit5,hit3,
splicesites,query5_compress_rev,query3_compress_rev,
localsplicing_penalty,querylength5,querylength3,genestrand);
}
/* Have 3-end..start and 5-end..start */
- debug10(printf("minus: comparing hit3->genomicstart %llu <= hit5->genomicend %llu\n",
- (unsigned long long) hit3->genomicstart,(unsigned long long) hit5->genomicend));
+ debug10(printf("minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n",
+ hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset));
if (hit3->genomicstart <= hit5->genomicend) {
/* No overlap */
new->insertlength = (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("minus, no overlap: insert length %d = end5 %llu - start3 %llu + %d + %d\n",
- new->insertlength,(unsigned long long) hit5->genomicend,
- (unsigned long long) hit3->genomicstart,querylength5,querylength3));
+ debug10(printf("minus, no overlap: insert length %d = end5 %u - start3 %u + %d + %d\n",
+ new->insertlength,hit5->genomicend - hit5->chroffset,
+ hit3->genomicstart - hit3->chroffset,querylength5,querylength3));
} else if ((chrpos = overlap3_gmap_minus(&querypos,&chrstart,&chrend,/*hit*/hit3,/*gmap*/hit5)) > 0U) {
new->insertlength = /* start5 */ (chrpos + querypos) - /* end3 */ chrend + 1;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("minus, overlap: insert length %d = start5 (%llu + %d) - end3 %llu + 1\n",
- new->insertlength,(unsigned long long) chrpos,querypos,(unsigned long long) chrend));
+ debug10(printf("minus, overlap: insert length %d = start5 (%u + %d) - end3 %u + 1\n",
+ new->insertlength,chrpos,querypos,chrend));
} else {
/* Still no overlap */
new->insertlength = (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
+
+ if (new->insertlength <= 0) {
+ /* Overreach */
+ debug5(printf(" Returning NULL because of overreach\n"));
+ if (private5p == true) {
+ Stage3end_free(&hit5);
+ }
+ if (private3p == true) {
+ Stage3end_free(&hit3);
+ }
+ FREE_OUT(new);
+ return (Stage3pair_T) NULL;
+ }
}
+
} else {
new->dir = 0;
new->insertlength = pair_insert_length_unpaired(hit5,hit3); /* was 0 */
@@ -12552,32 +12539,45 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
if (expect_concordant_p == true) {
/* Try to resolve ambiguity on inside of concordant ends */
- resolve_inside_ambiguous_splice_plus(&unresolved_amb_length,&hit5,&hit3,&private5p,&private3p,
+ resolve_inside_ambiguous_splice_plus(&unresolved_amb_length,&new->amb_resolve_5,&new->amb_resolve_3,
+ &new->amb_status_inside,hit5,hit3,
splicesites,query5_compress_fwd,query3_compress_fwd,
localsplicing_penalty,querylength5,querylength3,genestrand);
}
/* Have 5-start..end and 3-start..end */
- debug10(printf("plus: comparing hit5->genomicend %llu <= hit3->genomicstart %llu\n",
- (unsigned long long) hit5->genomicend,(unsigned long long) hit3->genomicstart));
+ debug10(printf("plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n",
+ hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset));
if (hit5->genomicend <= hit3->genomicstart) {
/* No overlap */
new->insertlength = (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("plus, no overlap: insert length %d = start3 %llu - end5 %llu + %d + %d\n",
- new->insertlength,(unsigned long long) hit3->genomicstart,
- (unsigned long long) hit5->genomicend,querylength5,querylength3));
+ debug10(printf("plus, no overlap: insert length %d = start3 %u - end5 %u + %d + %d\n",
+ new->insertlength,hit3->genomicstart - hit3->chroffset,
+ hit5->genomicend - hit5->chroffset,querylength5,querylength3));
} else if ((chrpos = overlap5_gmap_plus(&querypos,&chrstart,&chrend,/*hit*/hit5,/*gmap*/hit3)) > 0U) {
new->insertlength = /* end3 */ (chrpos - querypos + querylength3) - /* start5 */ chrstart;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("plus, overlap: insert length %d = end3 (%llu - %d + %d) - start5 %llu\n",
- new->insertlength,(unsigned long long) chrpos,querypos,querylength3,
- (unsigned long long) chrstart));
+ debug10(printf("plus, overlap: insert length %d = end3 (%u - %d + %d) - start5 %u\n",
+ new->insertlength,chrpos,querypos,querylength3,chrstart));
} else {
/* Still no overlap */
new->insertlength = (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
+
+ if (new->insertlength <= 0) {
+ /* Overreach */
+ debug5(printf(" Returning NULL because of overreach\n"));
+ if (private5p == true) {
+ Stage3end_free(&hit5);
+ }
+ if (private3p == true) {
+ Stage3end_free(&hit3);
+ }
+ FREE_OUT(new);
+ return (Stage3pair_T) NULL;
+ }
}
} else if (hit5->plusp == false && hit3->plusp == false) {
@@ -12585,31 +12585,44 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
if (expect_concordant_p == true) {
/* Try to resolve ambiguity on inside of concordant ends */
- resolve_inside_ambiguous_splice_minus(&unresolved_amb_length,&hit5,&hit3,&private5p,&private3p,
+ resolve_inside_ambiguous_splice_minus(&unresolved_amb_length,&new->amb_resolve_5,&new->amb_resolve_3,
+ &new->amb_status_inside,hit5,hit3,
splicesites,query5_compress_rev,query3_compress_rev,
localsplicing_penalty,querylength5,querylength3,genestrand);
}
/* Have 3-end..start and 5-end..start */
- debug10(printf("minus: comparing hit3->genomicstart %llu <= hit5->genomicend %llu\n",
- (unsigned long long) hit3->genomicstart,(unsigned long long) hit5->genomicend));
+ debug10(printf("minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n",
+ hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset));
if (hit3->genomicstart <= hit5->genomicend) {
/* No overlap */
new->insertlength = (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("minus, no overlap: insert length %d = end5 %llu - start3 %llu + %d + %d\n",
- new->insertlength,(unsigned long long) hit5->genomicend,
- (unsigned long long) hit3->genomicstart,querylength5,querylength3));
+ debug10(printf("minus, no overlap: insert length %d = end5 %u - start3 %u + %d + %d\n",
+ new->insertlength,hit5->genomicend - hit5->chroffset,
+ hit3->genomicstart - hit3->chroffset,querylength5,querylength3));
} else if ((chrpos = overlap5_gmap_minus(&querypos,&chrstart,&chrend,/*hit*/hit5,/*gmap*/hit3)) > 0U) {
new->insertlength = /* start5 */ chrstart - /* end3 */ (chrpos + querypos - querylength3) - 1;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("minus, overlap: insert length %d = start5 %llu - end3 (%llu + %d - %d) - 1\n",
- new->insertlength,(unsigned long long) chrstart,(unsigned long long) chrpos,
- querypos,querylength3));
+ debug10(printf("minus, overlap: insert length %d = start5 %u - end3 (%u + %d - %d) - 1\n",
+ new->insertlength,chrstart,chrpos,querypos,querylength3));
} else {
/* Still no overlap */
new->insertlength = (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
+
+ if (new->insertlength <= 0) {
+ /* Overreach */
+ debug5(printf(" Returning NULL because of overreach\n"));
+ if (private5p == true) {
+ Stage3end_free(&hit5);
+ }
+ if (private3p == true) {
+ Stage3end_free(&hit3);
+ }
+ FREE_OUT(new);
+ return (Stage3pair_T) NULL;
+ }
}
} else {
new->dir = 0;
@@ -12659,15 +12672,20 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
if (expect_concordant_p == true) {
overreach5p = overreach3p = false;
if (hit5->hittype == SPLICE) {
- if (Substring_alignstart(hit5->substring2) > hit3->genomicend) {
- if (Substring_alignend(hit5->substring1) < hit3->genomicstart) {
+
+ substringN = (Substring_T) List_head(hit5->substrings_Nto1);
+ if (Substring_alignstart(substringN) > hit3->genomicend) {
+ substring1 = (Substring_T) List_head(hit5->substrings_1toN);
+ if (Substring_alignend(substring1) < hit3->genomicstart) {
overreach5p = true;
}
}
}
if (hit3->hittype == SPLICE) {
- if (Substring_alignend(hit3->substring1) < hit5->genomicstart) {
- if (Substring_alignstart(hit3->substring2) > hit5->genomicend) {
+ substring1 = (Substring_T) List_head(hit3->substrings_1toN);
+ if (Substring_alignend(substring1) < hit5->genomicstart) {
+ substringN = (Substring_T) List_head(hit3->substrings_Nto1);
+ if (Substring_alignstart(substringN) > hit5->genomicend) {
overreach3p = true;
}
}
@@ -12753,7 +12771,8 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
}
/* Try to resolve ambiguity on inside of concordant ends */
- resolve_inside_ambiguous_splice_plus(&unresolved_amb_length,&hit5,&hit3,&private5p,&private3p,
+ resolve_inside_ambiguous_splice_plus(&unresolved_amb_length,&new->amb_resolve_5,&new->amb_resolve_3,
+ &new->amb_status_inside,hit5,hit3,
splicesites,query5_compress_fwd,query3_compress_fwd,
localsplicing_penalty,querylength5,querylength3,genestrand);
}
@@ -12763,14 +12782,14 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
/* No overlap */
new->insertlength = (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("plus, no overlap: insert length %d = start3 %llu - end5 %llu + %d + %d\n",
- new->insertlength,(unsigned long long) hit3->genomicstart,
- (unsigned long long) hit5->genomicend,querylength5,querylength3));
+ debug10(printf("plus, no overlap: insert length %d = start3 %u - end5 %u + %d + %d\n",
+ new->insertlength,hit3->genomicstart - hit3->chroffset,
+ hit5->genomicend - hit5->chroffset,querylength5,querylength3));
#if 0
} else if (hit5->genomicend > hit3->genomicend + SUBSUMPTION_SLOP) {
/* hit5 subsumes hit3 */
- debug10(printf("plus, subsumption %llu > %llu\n",
- (unsigned long long) hit5->genomicend,(unsigned long long) hit3->genomicend));
+ debug10(printf("plus, subsumption %u > %u\n",
+ hit5->genomicend - hit5->chroffset,hit3->genomicend - hit3->chroffset));
new->insertlength = 0;
new->insertlength_expected_sign = false;
#endif
@@ -12788,15 +12807,19 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
if (expect_concordant_p == true) {
overreach5p = overreach3p = false;
if (hit5->hittype == SPLICE) {
- if (Substring_alignstart(hit5->substring2) < hit3->genomicend) {
- if (Substring_alignend(hit5->substring1) > hit3->genomicstart) {
+ substringN = (Substring_T) List_head(hit5->substrings_Nto1);
+ if (Substring_alignstart(substringN) < hit3->genomicend) {
+ substring1 = (Substring_T) List_head(hit5->substrings_1toN);
+ if (Substring_alignend(substring1) > hit3->genomicstart) {
overreach5p = true;
}
}
}
if (hit3->hittype == SPLICE) {
- if (Substring_alignend(hit3->substring1) > hit5->genomicstart) {
- if (Substring_alignstart(hit3->substring2) < hit5->genomicend) {
+ substring1 = (Substring_T) List_head(hit3->substrings_1toN);
+ if (Substring_alignend(substring1) > hit5->genomicstart) {
+ substringN = (Substring_T) List_head(hit3->substrings_Nto1);
+ if (Substring_alignstart(substringN) < hit5->genomicend) {
overreach3p = true;
}
}
@@ -12882,7 +12905,8 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
}
/* Try to resolve ambiguity on inside of concordant ends */
- resolve_inside_ambiguous_splice_minus(&unresolved_amb_length,&hit5,&hit3,&private5p,&private3p,
+ resolve_inside_ambiguous_splice_minus(&unresolved_amb_length,&new->amb_resolve_5,&new->amb_resolve_3,
+ &new->amb_status_inside,hit5,hit3,
splicesites,query5_compress_rev,query3_compress_rev,
localsplicing_penalty,querylength5,querylength3,genestrand);
}
@@ -12892,14 +12916,14 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
/* No overlap */
new->insertlength = (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
- debug10(printf("minus, no overlap: insert length %d = end5 %llu - start3 %llu + %d + %d\n",
- new->insertlength,(unsigned long long) hit5->genomicend,
- (unsigned long long) hit3->genomicstart,querylength5,querylength3));
+ debug10(printf("minus, no overlap: insert length %d = end5 %u - start3 %u + %d + %d\n",
+ new->insertlength,hit5->genomicend - hit5->chroffset,
+ hit3->genomicstart - hit3->chroffset,querylength5,querylength3));
#if 0
} else if (hit3->genomicstart > hit5->genomicstart + SUBSUMPTION_SLOP) {
/* hit3 subsumes hit5 */
- debug10(printf("minus, subsumption %llu > %llu\n",
- (unsigned long long) hit3->genomicstart,(unsigned long long) hit5->genomicstart));
+ debug10(printf("minus, subsumption %u > %u\n",
+ hit3->genomicstart - hit3->chroffset,hit5->genomicstart - hit5->chroffset));
new->insertlength = 0;
new->insertlength_expected_sign = false;
#endif
@@ -12907,11 +12931,11 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
new->insertlength = pair_insert_length(hit5,hit3);
new->insertlength_expected_sign = insertlength_expected(new->insertlength);
}
-
}
-
- debug5(printf("\nGot insertlength of %d\n",new->insertlength));
+ debug5(printf("\nGot insertlength of %d, overreach5p %d, overreach3p %d\n",new->insertlength,overreach5p,overreach3p));
+ /* Was new->insertlength <= 0, but this eliminates legitimate overlaps */
+ /* Was new->insertlength < -pairmax, but this allows overreach */
if (new->insertlength <= 0) {
/* Not concordant */
#ifdef USE_BINGO
@@ -12922,7 +12946,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
#endif
if (expect_concordant_p == true) {
- debug5(printf(" Returning NULL\n"));
+ debug5(printf(" Returning NULL, because not concordant\n"));
if (private5p == true) {
Stage3end_free(&hit5);
}
@@ -12934,7 +12958,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
}
} else if (new->insertlength > pairmax && expect_concordant_p == true) {
- debug5(printf(" Returning NULL\n"));
+ debug5(printf(" Returning NULL because insertlength %d > pairmax %d\n",new->insertlength,pairmax));
if (private5p == true) {
Stage3end_free(&hit5);
}
@@ -12971,9 +12995,9 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
/* Do not alter score, so the alignmnent terminates at the known splice site */
new->score = hit5->score + hit3->score /* + unresolved_amb_length */;
+
new->nmatches = hit5->nmatches + hit3->nmatches - unresolved_amb_length;
new->nmatches_posttrim = hit5->nmatches_posttrim + hit3->nmatches_posttrim;
- new->indel_low = hit5->indel_low + hit3->indel_low;
/* new->overlap_known_gene_p = false; -- initialized later when resolving multimappers */
new->tally = -1L;
@@ -12982,8 +13006,8 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
#if 0
if (new->low > new->high) {
- fprintf(stderr,"new->low %llu > new->high %llu, hit5->chrnum %d\n",
- (unsigned long long) new->low,(unsigned long long) new->high,hit5->chrnum);
+ fprintf(stderr,"new->low %u > new->high %u, hit5->chrnum %d\n",
+ new->low - new->chroffset,new->high - new->chroffset,hit5->chrnum);
abort();
}
#endif
@@ -12995,18 +13019,18 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
}
new->hit5 = hit5;
- new->private5p = private5p;
-
new->hit3 = hit3;
+
+ new->private5p = private5p;
new->private3p = private3p;
+
if (expect_concordant_p == true) {
hit5->paired_usedp = true;
hit3->paired_usedp = true;
}
- new->nchimera_known = hit5->nchimera_known + hit3->nchimera_known;
- new->nchimera_novel = hit5->nchimera_novel + hit3->nchimera_novel;
+ new->nsplices = hit5->nsplices + hit3->nsplices;
debug0(printf("Created new pair %p from %p and %p with private %d, %d\n",new,hit5,hit3,private5p,private3p));
debug0(printf(" hittypes %s and %s\n",hittype_string(hit5->hittype),hittype_string(hit3->hittype)));
@@ -13026,7 +13050,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
new->insertlength -= hit5->chrlength;
}
- if (hit5->alias > 0) {
+ if (hit5->alias > 0) {
debug0(printf("Unaliasing 5' end\n"));
if (private5p == false) {
new->hit5 = Stage3end_copy(hit5);
@@ -13044,6 +13068,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
unalias_circular(new->hit3);
}
+ /* assert((int) new->insertlength >= 0); */
return new;
}
@@ -13074,81 +13099,6 @@ Stage3pair_privatize (Stage3pair_T *array, int npairs) {
}
-
-#if 0
-static int
-chimera_match_distance_cmp (const void *a, const void *b) {
- T x = * (T *) a;
- T y = * (T *) b;
-
- if (x->nmismatches_whole < y->nmismatches_whole) {
- return -1;
- } else if (x->nmismatches_whole > y->nmismatches_whole) {
- return +1;
- } else if (x->distance < y->distance) {
- return -1;
- } else if (x->distance > y->distance) {
- return +1;
- } else {
- return 0;
- }
-}
-#endif
-
-#if 0
-List_T
-Stage3end_sort_bymatchdist (List_T hitlist, int maxchimerapaths) {
-#ifdef DEBUG
- T hit;
-#endif
- List_T sorted = NULL, p;
- T *hits;
- int npaths, n, i;
-
- if ((n = List_length(hitlist)) == 0) {
- return NULL;
- } else {
-#ifdef USE_ALLOCA_FOR_HITS
- hits = (T *) MALLOCA(n * sizeof(T));
- List_free_array_and_free(hits,&hitlist);
-#else
- hits = (T *) List_to_array(hitlist);
- List_free(&hitlist);
-#endif
- qsort(hits,n,sizeof(T),chimera_match_distance_cmp);
- }
-
- if (n < maxchimerapaths) {
- npaths = n;
- } else {
- npaths = maxchimerapaths;
- }
- for (i = n-1; i >= npaths; i--) {
- Stage3end_free(&(hits[i]));
- }
- for (i = npaths-1; i >= 0; i--) {
- sorted = List_push(sorted,hits[i]);
- }
-#ifdef USE_ALLOCA_FOR_HITS
- FREEA(hits);
-#else
- FREE(hits);
-#endif
-
- debug(
- for (p = sorted, i = 0; p != NULL; p = p->rest, i++) {
- hit = (T) p->first;
- printf(" Final %d: chr %d -- %d\n",
- i,hit->substring1->chrnum,hit->substring2->chrnum);
- }
- );
-
- return sorted;
-}
-#endif
-
-
-
/* Used for eliminating exact duplicates. Also sorts secondarily by hittype. */
static int
hitpair_sort_cmp (const void *a, const void *b) {
@@ -13159,21 +13109,21 @@ hitpair_sort_cmp (const void *a, const void *b) {
Univcoord_T x_hit3_high, x_hit3_low, y_hit3_high, y_hit3_low;
Univcoord_T x_low, x_high, y_low, y_high;
- debug8(printf(" Comparing (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), alias %d|%d, nmatches: %d (%d posttrim), indel_low %d and %d\n",
+ debug8(printf(" Comparing (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), alias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d\n",
Pairtype_string(x->pairtype),hittype_string(x->hit5->hittype),
hittype_string(x->hit3->hittype),x,
x->hit5->low - x->hit5->chroffset,x->hit5->high - x->hit5->chroffset,
x->hit3->low - x->hit3->chroffset,x->hit3->high - x->hit3->chroffset,
x->dir,x->hit5->alias,x->hit3->alias,x->nmatches,x->nmatches_posttrim,
- x->hit5->indel_low,x->hit3->indel_low));
+ start_amb_length(x->hit5) + end_amb_length(x->hit5),start_amb_length(x->hit3) + end_amb_length(x->hit3)));
- debug8(printf(" with (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), alias %d|%d, nmatches: %d (%d posttrim), indel_low %d and %d\n",
+ debug8(printf(" with (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), alias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d\n",
Pairtype_string(y->pairtype),hittype_string(y->hit5->hittype),
hittype_string(y->hit3->hittype),y,
y->hit5->low - y->hit5->chroffset,y->hit5->high - y->hit5->chroffset,
y->hit3->low - y->hit3->chroffset,y->hit3->high - y->hit3->chroffset,
y->dir,y->hit5->alias,y->hit3->alias,y->nmatches,y->nmatches_posttrim,
- y->hit5->indel_low,y->hit3->indel_low));
+ start_amb_length(y->hit5) + end_amb_length(y->hit5),start_amb_length(y->hit3) + end_amb_length(y->hit3)));
x_hit5_low = normalize_coord(x->hit5->low,x->hit5->alias,x->hit5->chrlength);
@@ -13282,15 +13232,20 @@ hitpair_sort_cmp (const void *a, const void *b) {
return -1;
} else if (y->nmatches_posttrim > x->nmatches_posttrim) {
return +1;
- } else if (x->nchimera_novel < y->nchimera_novel) {
+#endif
+
+#if 0
+ } else if (x->nsplices < y->nsplices) {
return -1;
- } else if (y->nchimera_novel < x->nchimera_novel) {
+ } else if (y->nsplices < x->nsplices) {
return +1;
#endif
- } else if (x->nchimera_known > y->nchimera_known) {
+
+ } else if (x->amb_status_inside < y->amb_status_inside) {
return -1;
- } else if (y->nchimera_known > x->nchimera_known) {
+ } else if (y->amb_status_inside < x->amb_status_inside) {
return +1;
+
} else if (x->hit5->hittype < y->hit5->hittype) {
return -1;
} else if (y->hit5->hittype < x->hit5->hittype) {
@@ -13299,14 +13254,43 @@ hitpair_sort_cmp (const void *a, const void *b) {
return -1;
} else if (y->hit3->hittype < x->hit3->hittype) {
return +1;
+
+#if 0
+ } else if ((x->amb_resolve_5 != -1 && x->amb_resolve_3 != -1) &&
+ (y->amb_resolve_5 == -1 || y->amb_resolve_3 == -1)) {
+ /* x is resolved, y is ambiguous. x wins */
+ return -1;
+ } else if ((y->amb_resolve_5 != -1 && y->amb_resolve_3 != -1) &&
+ (x->amb_resolve_5 == -1 || x->amb_resolve_3 == -1)) {
+ /* y is resolved, x is ambiguous. y wins */
+ return +1;
+#endif
+
+#if 0
+ } else if (x->hit5->start_amb_length + x->hit5->end_amb_length +
+ x->hit3->start_amb_length + x->hit3->end_amb_length == 0 &&
+ y->hit5->start_amb_length + y->hit5->end_amb_length +
+ y->hit3->start_amb_length + y->hit3->end_amb_length > 0) {
+ /* x is resolved, y is ambiguous. x wins */
+ return -1;
+ } else if (y->hit5->start_amb_length + y->hit5->end_amb_length +
+ y->hit3->start_amb_length + y->hit3->end_amb_length == 0 &&
+ x->hit5->start_amb_length + x->hit5->end_amb_length +
+ x->hit3->start_amb_length + x->hit3->end_amb_length > 0) {
+ /* y is resolved, x is ambiguous. y wins */
+ return +1;
+#endif
+
} else if (x->sense_consistent_p == true && y->sense_consistent_p == false) {
return -1;
} else if (x->sense_consistent_p == false && y->sense_consistent_p == true) {
return +1;
+#if 0
} else if (x->indel_low < y->indel_low) {
return -1;
} else if (y->indel_low < x->indel_low) {
return +1;
+#endif
} else {
return 0;
}
@@ -13400,16 +13384,30 @@ hitpair_equiv_cmp (Stage3pair_T x, Stage3pair_T y) {
#endif
#if 0
- /* Causes GMAP and non-GMAP to not be recognized as equivalent */
- } else if (x->nchimera_novel < y->nchimera_novel) {
+ /* Causes hits to not be recognized as equivalent */
+ } else if (x->nsplices < y->nsplices) {
return -1;
- } else if (y->nchimera_novel < x->nchimera_novel) {
+ } else if (y->nsplices < x->nsplices) {
return +1;
#endif
- } else if (x->nchimera_known > y->nchimera_known) {
+
+ } else if (x->amb_status_inside < y->amb_status_inside) {
+ return -1;
+ } else if (y->amb_status_inside < x->amb_status_inside) {
+ return +1;
+
+#if 0
+ } else if (x->hit5->start_amb_length + x->hit5->end_amb_length +
+ x->hit3->start_amb_length + x->hit3->end_amb_length > 0 &&
+ y->hit5->start_amb_length + y->hit5->end_amb_length +
+ y->hit3->start_amb_length + y->hit3->end_amb_length == 0) {
return -1;
- } else if (y->nchimera_known > x->nchimera_known) {
+ } else if (y->hit5->start_amb_length + y->hit5->end_amb_length +
+ y->hit3->start_amb_length + y->hit3->end_amb_length > 0 &&
+ x->hit5->start_amb_length + x->hit5->end_amb_length +
+ x->hit3->start_amb_length + x->hit3->end_amb_length == 0) {
return +1;
+#endif
} else if (x->sense_consistent_p == true && y->sense_consistent_p == false) {
return -1;
@@ -13771,9 +13769,9 @@ hitpair_goodness_cmp (bool *equalp, Stage3pair_T hitpair,
max_trim_left = (hit5->trim_left > besthit5->trim_left) ? hit5->trim_left : besthit5->trim_left;
max_trim_right = (hit5->trim_right > besthit5->trim_right) ? hit5->trim_right : besthit5->trim_right;
hitpair_nmatches = Pair_array_nmatches_posttrim(hit5->pairarray,hit5->npairs,
- /*pos5*/max_trim_left,/*pos3*/hit5->querylength_adj - max_trim_right);
+ /*pos5*/max_trim_left,/*pos3*/hit5->querylength - max_trim_right);
best_hitpair_nmatches = Pair_array_nmatches_posttrim(besthit5->pairarray,besthit5->npairs,
- /*pos5*/max_trim_left,/*pos3*/besthit5->querylength_adj - max_trim_right);
+ /*pos5*/max_trim_left,/*pos3*/besthit5->querylength - max_trim_right);
debug8(printf(" gmap/gmap on 5' end with trim %d left, %d right: %d versus %d",
max_trim_left,max_trim_right,hitpair_nmatches,best_hitpair_nmatches));
@@ -13782,9 +13780,9 @@ hitpair_goodness_cmp (bool *equalp, Stage3pair_T hitpair,
max_trim_left = (hit3->trim_left > besthit3->trim_left) ? hit3->trim_left : besthit3->trim_left;
max_trim_right = (hit3->trim_right > besthit3->trim_right) ? hit3->trim_right : besthit3->trim_right;
hitpair_nmatches += Pair_array_nmatches_posttrim(hit3->pairarray,hit3->npairs,
- /*pos5*/max_trim_left,/*pos3*/hit3->querylength_adj - max_trim_right);
+ /*pos5*/max_trim_left,/*pos3*/hit3->querylength - max_trim_right);
best_hitpair_nmatches += Pair_array_nmatches_posttrim(besthit3->pairarray,besthit3->npairs,
- /*pos5*/max_trim_left,/*pos3*/besthit3->querylength_adj - max_trim_right);
+ /*pos5*/max_trim_left,/*pos3*/besthit3->querylength - max_trim_right);
debug8(printf(" gmap/gmap on 3' end with trim %d left, %d right: %d versus %d",
max_trim_left,max_trim_right,hitpair_nmatches,best_hitpair_nmatches));
@@ -13795,9 +13793,9 @@ hitpair_goodness_cmp (bool *equalp, Stage3pair_T hitpair,
max_trim_left = (hit5->trim_left > besthit5->trim_left) ? hit5->trim_left : besthit5->trim_left;
max_trim_right = (hit5->trim_right > besthit5->trim_right) ? hit5->trim_right : besthit5->trim_right;
hitpair_nmatches = Pair_array_nmatches_posttrim(hit5->pairarray,hit5->npairs,
- /*pos5*/max_trim_left,/*pos3*/hit5->querylength_adj - max_trim_right);
+ /*pos5*/max_trim_left,/*pos3*/hit5->querylength - max_trim_right);
best_hitpair_nmatches = Pair_array_nmatches_posttrim(besthit5->pairarray,besthit5->npairs,
- /*pos5*/max_trim_left,/*pos3*/besthit5->querylength_adj - max_trim_right);
+ /*pos5*/max_trim_left,/*pos3*/besthit5->querylength - max_trim_right);
debug8(printf(" gmap/gmap on 5' end with trim %d left, %d right: %d versus %d",
max_trim_left,max_trim_right,hitpair_nmatches,best_hitpair_nmatches));
@@ -13811,9 +13809,9 @@ hitpair_goodness_cmp (bool *equalp, Stage3pair_T hitpair,
max_trim_left = (hit3->trim_left > besthit3->trim_left) ? hit3->trim_left : besthit3->trim_left;
max_trim_right = (hit3->trim_right > besthit3->trim_right) ? hit3->trim_right : besthit3->trim_right;
hitpair_nmatches = Pair_array_nmatches_posttrim(hit3->pairarray,hit3->npairs,
- /*pos5*/max_trim_left,/*pos3*/hit3->querylength_adj - max_trim_right);
+ /*pos5*/max_trim_left,/*pos3*/hit3->querylength - max_trim_right);
best_hitpair_nmatches = Pair_array_nmatches_posttrim(besthit3->pairarray,besthit3->npairs,
- /*pos5*/max_trim_left,/*pos3*/besthit3->querylength_adj - max_trim_right);
+ /*pos5*/max_trim_left,/*pos3*/besthit3->querylength - max_trim_right);
debug8(printf(" gmap/gmap on 3' end with trim %d left, %d right: %d versus %d",
max_trim_left,max_trim_right,hitpair_nmatches,best_hitpair_nmatches));
@@ -13857,40 +13855,26 @@ hitpair_goodness_cmp (bool *equalp, Stage3pair_T hitpair,
#endif
#if 0
- } else if (hitpair->nchimera_novel > best_hitpair->nchimera_novel) {
+ } else if (hitpair->nsplices > best_hitpair->nsplices) {
/* k is worse */
- debug8(printf(" => loses by nchimera_novel\n"));
+ debug8(printf(" => loses by nsplices: %d > %d in best\n",hitpair->nsplices,best_hitpair->nsplices));
return -1;
- } else if (hitpair->nchimera_novel < best_hitpair->nchimera_novel) {
+ } else if (hitpair->nsplices < best_hitpair->nsplices) {
/* k is better */
- debug8(printf(" => wins by nchimera_novel\n"));
+ debug8(printf(" => wins by nsplices: %d < %d in best\n",hitpair->nsplices,best_hitpair->nsplices));
return +1;
#endif
- /* Favoring nchimera_known helps before outerlength favors known
- splices over novel ones */
- } else if (hitpair->nchimera_known < best_hitpair->nchimera_known) {
+ } else if (hitpair->amb_status_inside > best_hitpair->amb_status_inside) {
/* k is worse */
- debug8(printf(" => loses by nchimera_known: %d < %d\n",
- hitpair->nchimera_known < best_hitpair->nchimera_known));
+ debug8(printf(" => loses by amb_status_inside\n"));
return -1;
- } else if (hitpair->nchimera_known > best_hitpair->nchimera_known) {
+ } else if (hitpair->amb_status_inside < best_hitpair->amb_status_inside) {
/* k is better */
- debug8(printf(" => wins by nchimera_known\n"));
+ debug8(printf(" => wins by amb_status_inside\n"));
return +1;
-#if 0
- } else if (hitpair->absdifflength < best_hitpair->absdifflength) {
- /* k is worse */
- debug8(printf(" => loses by absdifflength\n"));
- return -1;
- } else if (hitpair->absdifflength > best_hitpair->absdifflength) {
- /* k is better */
- debug8(printf(" => wins by absdifflength\n"));
- return +1;
-#endif
-#if 0
} else if (hitpair->hit5->hittype > best_hitpair->hit5->hittype &&
hitpair->hit3->hittype >= best_hitpair->hit3->hittype) {
/* k is worse */
@@ -13914,6 +13898,48 @@ hitpair_goodness_cmp (bool *equalp, Stage3pair_T hitpair,
/* k is better */
debug8(printf(" => wins by hittype\n"));
return +1;
+
+#if 0
+ } else if ((hitpair->amb_resolve_5 == -1 || hitpair->amb_resolve_3 == -1) &&
+ (best_hitpair->amb_resolve_5 != -1 && best_hitpair->amb_resolve_3 != -1)) {
+ /* best_hitpair is resolved, hitpair is ambiguous. best_hitpair wins */
+ debug8(printf(" => loses by resolve_inside\n"));
+ return -1;
+
+ } else if ((hitpair->amb_resolve_5 != -1 && hitpair->amb_resolve_3 != -1) &&
+ (best_hitpair->amb_resolve_5 == -1 || best_hitpair->amb_resolve_3 == -1)) {
+ /* hitpair is resolved, best_hitpair is ambiguous. hitpair wins */
+ debug8(printf(" => wins by resolve_inside: %d, %d, %d, %d\n",
+ hitpair->amb_resolve_5,hitpair->amb_resolve_3,
+ best_hitpair->amb_resolve_5,best_hitpair->amb_resolve_3));
+ return +1;
+#endif
+
+#if 0
+ } else if (hitpair->hit5->start_amb_length + hitpair->hit5->end_amb_length +
+ hitpair->hit3->start_amb_length + hitpair->hit3->end_amb_length > 0 &&
+ best_hitpair->hit5->start_amb_length + best_hitpair->hit5->end_amb_length +
+ best_hitpair->hit3->start_amb_length + best_hitpair->hit3->end_amb_length == 0) {
+ debug8(printf(" => loses by ambiguity\n"));
+ return -1;
+
+ } else if (hitpair->hit5->start_amb_length + hitpair->hit5->end_amb_length +
+ hitpair->hit3->start_amb_length + hitpair->hit3->end_amb_length == 0 &&
+ best_hitpair->hit5->start_amb_length + best_hitpair->hit5->end_amb_length +
+ best_hitpair->hit3->start_amb_length + best_hitpair->hit3->end_amb_length > 0) {
+ debug8(printf(" => wins by ambiguity\n"));
+ return +1;
+#endif
+
+#if 0
+ } else if (hitpair->absdifflength < best_hitpair->absdifflength) {
+ /* k is worse */
+ debug8(printf(" => loses by absdifflength\n"));
+ return -1;
+ } else if (hitpair->absdifflength > best_hitpair->absdifflength) {
+ /* k is better */
+ debug8(printf(" => wins by absdifflength\n"));
+ return +1;
#endif
} else if (finalp == false) {
@@ -13950,59 +13976,26 @@ hitpair_goodness_cmp (bool *equalp, Stage3pair_T hitpair,
/* Next we look at splice probability */
} else {
- if (hitpair->hit5->hittype == SPLICE && best_hitpair->hit5->hittype == SPLICE &&
- hitpair->hit3->hittype == SPLICE && best_hitpair->hit3->hittype == SPLICE) {
- debug8(printf(" => dual splice"));
- prob1 = Substring_chimera_prob(hitpair->hit5->substring_donor) + Substring_chimera_prob(hitpair->hit5->substring_acceptor) +
- Substring_chimera_prob(hitpair->hit3->substring_donor) + Substring_chimera_prob(hitpair->hit3->substring_acceptor);
- prob2 = Substring_chimera_prob(best_hitpair->hit5->substring_donor) + Substring_chimera_prob(best_hitpair->hit5->substring_acceptor) +
- Substring_chimera_prob(best_hitpair->hit3->substring_donor) + Substring_chimera_prob(best_hitpair->hit3->substring_acceptor);
- if (prob1 + 0.3 < prob2) {
- /* k is worse */
- debug8(printf(" => loses by dual splice prob %f vs %f\n",prob1,prob2));
- return -1;
- } else if (prob1 > prob2 + 0.3) {
- /* k is better */
- debug8(printf(" => wins by dual splice prob %f vs %f\n",prob1,prob2));
- return +1;
- }
-
- } else if (hitpair->hit5->hittype == SPLICE && best_hitpair->hit5->hittype == SPLICE) {
- debug8(printf(" => splice on hit5"));
- prob1 = Substring_chimera_prob(hitpair->hit5->substring_donor) + Substring_chimera_prob(hitpair->hit5->substring_acceptor);
- prob2 = Substring_chimera_prob(best_hitpair->hit5->substring_donor) + Substring_chimera_prob(best_hitpair->hit5->substring_acceptor);
- if (prob1 + 0.3 < prob2) {
- /* k is worse */
- debug8(printf(" => loses by splice prob %f vs %f\n",prob1,prob2));
- return -1;
- } else if (prob1 > prob2 + 0.3) {
- /* k is better */
- debug8(printf(" => wins by splice prob %f vs %f\n",prob1,prob2));
- return +1;
- }
-
- } else if (hitpair->hit3->hittype == SPLICE && best_hitpair->hit3->hittype == SPLICE) {
- debug8(printf(" => splice on hit3"));
- prob1 = Substring_chimera_prob(hitpair->hit3->substring_donor) + Substring_chimera_prob(hitpair->hit3->substring_acceptor);
- prob2 = Substring_chimera_prob(best_hitpair->hit3->substring_donor) + Substring_chimera_prob(best_hitpair->hit3->substring_acceptor);
- if (prob1 + 0.3 < prob2) {
- /* k is worse */
- debug8(printf(" => loses by splice prob %f vs %f\n",prob1,prob2));
- return -1;
- } else if (prob1 > prob2 + 0.3) {
- /* k is better */
- debug8(printf(" => wins by splice prob %f vs %f\n",prob1,prob2));
- return +1;
- }
+ debug8(printf(" => prob"));
+ prob1 = Stage3end_prob(hitpair->hit5) + Stage3end_prob(hitpair->hit3);
+ prob2 = Stage3end_prob(best_hitpair->hit5) + Stage3end_prob(best_hitpair->hit3);
+ if (prob1 + 0.3 < prob2) {
+ /* k is worse */
+ debug8(printf(" => loses by dual splice prob %f vs %f\n",prob1,prob2));
+ return -1;
+ } else if (prob1 > prob2 + 0.3) {
+ /* k is better */
+ debug8(printf(" => wins by dual splice prob %f vs %f\n",prob1,prob2));
+ return +1;
}
/* Overlapping ends worse than separate ends */
- if (hitpair->insertlength <= hitpair->hit5->querylength_adj + hitpair->hit3->querylength_adj &&
- best_hitpair->insertlength > best_hitpair->hit5->querylength_adj + best_hitpair->hit3->querylength_adj) {
+ if (hitpair->insertlength <= hitpair->hit5->querylength + hitpair->hit3->querylength &&
+ best_hitpair->insertlength > best_hitpair->hit5->querylength + best_hitpair->hit3->querylength) {
debug8(printf(" => loses by being overlapping\n"));
return -1;
- } else if (hitpair->insertlength > hitpair->hit5->querylength_adj + hitpair->hit3->querylength_adj &&
- best_hitpair->insertlength <= best_hitpair->hit5->querylength_adj + best_hitpair->hit3->querylength_adj) {
+ } else if (hitpair->insertlength > hitpair->hit5->querylength + hitpair->hit3->querylength &&
+ best_hitpair->insertlength <= best_hitpair->hit5->querylength + best_hitpair->hit3->querylength) {
debug8(printf(" => wins by being separate\n"));
return +1;
@@ -14094,21 +14087,23 @@ pair_remove_bad_superstretches (bool *keep_p, Stage3pair_T superstretch, List_T
q = List_next(p);
while (q != NULL && hitpair_subsumption(stage3pair,(Stage3pair_T) List_head(q)) == true) {
#ifdef DEBUG8
- printf(" This (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), nmatches: %d (%d posttrim), indel_low %d and %d\n",
+ printf(" This (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), nmatches: %d (%d posttrim), insertlength %d, amb_status_inside %d, amb_lengths %d and %d\n",
Pairtype_string(stage3pair->pairtype),hittype_string(stage3pair->hit5->hittype),
hittype_string(stage3pair->hit3->hittype),stage3pair,
stage3pair->hit5->low - stage3pair->hit5->chroffset,stage3pair->hit5->high - stage3pair->hit5->chroffset,
stage3pair->hit3->low - stage3pair->hit3->chroffset,stage3pair->hit3->high - stage3pair->hit3->chroffset,
stage3pair->dir,stage3pair->nmatches,stage3pair->nmatches_posttrim,
- stage3pair->hit5->indel_low,stage3pair->hit3->indel_low);
+ stage3pair->insertlength,stage3pair->amb_status_inside,
+ start_amb_length(stage3pair->hit5)+ end_amb_length(stage3pair->hit5),start_amb_length(stage3pair->hit3) + end_amb_length(stage3pair->hit3)));
hitpair = (Stage3pair_T) List_head(q);
- printf("subsumes that (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), nmatches: %d (%d posttrim), indel_low %d and %d\n",
+ printf("subsumes that (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), nmatches: %d (%d posttrim), insertlength %d, amb_status_inside %d, amb_lengths %d and %d\n",
Pairtype_string(hitpair->pairtype),hittype_string(hitpair->hit5->hittype),
hittype_string(hitpair->hit3->hittype),hitpair,
hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
hitpair->dir,hitpair->nmatches,hitpair->nmatches_posttrim,
- hitpair->hit5->indel_low,hitpair->hit3->indel_low);
+ hitpair->insertlength,hitpair->amb_status_inside,
+ start_amb_length(hitpair->hit5) + end_amb_length(hitpair->hit5),start_amb_length(hitpair->hit3) + end_amb_length(hitpair->hit3)));
#endif
q = List_next(q);
}
@@ -14231,13 +14226,13 @@ pair_remove_overlaps (List_T hitpairlist, bool translocp, bool finalp) {
debug8(
for (i = 0; i < n; i++) {
hitpair = hitpairs[i];
- printf(" Initial %d (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), alias %d|%d, nmatches: %d (%d posttrim), indel_low %d and %d\n",
+ printf(" Initial %d (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), alias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d\n",
i,Pairtype_string(hitpair->pairtype),hittype_string(hitpair->hit5->hittype),
hittype_string(hitpair->hit3->hittype),hitpair,
hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
hitpair->dir,hitpair->hit5->alias,hitpair->hit3->alias,hitpair->nmatches,hitpair->nmatches_posttrim,
- hitpair->hit5->indel_low,hitpair->hit3->indel_low);
+ start_amb_length(hitpair->hit5) + end_amb_length(hitpair->hit5),start_amb_length(hitpair->hit3) + end_amb_length(hitpair->hit3));
}
);
@@ -14316,7 +14311,7 @@ Stage3pair_remove_overlaps (List_T hitpairlist, bool translocp, bool finalp) {
for (p = hitpairlist; p != NULL; p = List_next(p)) {
hitpair = (Stage3pair_T) List_head(p);
- if (hitpair->insertlength <= hitpair->hit5->querylength_adj + hitpair->hit3->querylength_adj) {
+ if (hitpair->insertlength <= hitpair->hit5->querylength + hitpair->hit3->querylength) {
overlapping = List_push(overlapping,(void *) hitpair);
} else {
separate = List_push(separate,(void *) hitpair);
@@ -14517,10 +14512,11 @@ Stage3pair_T *
Stage3pair_eval_and_sort (int *npaths, int *first_absmq, int *second_absmq,
Stage3pair_T *stage3pairarray, int maxpaths,
Shortread_T queryseq5, Shortread_T queryseq3,
+ char *queryuc_ptr_5, char *queryrc5,
+ char *queryuc_ptr_3, char *queryrc3,
Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
- Genome_T genome, char *quality_string_5, char *quality_string_3) {
- char *query5, *query3;
+ char *quality_string_5, char *quality_string_3) {
float maxlik, loglik;
float total, q;
@@ -14544,15 +14540,17 @@ Stage3pair_eval_and_sort (int *npaths, int *first_absmq, int *second_absmq,
}
stage3pairarray[0]->absmq_score = MAPQ_MAXIMUM_SCORE;
- query5 = Shortread_fullpointer_uc(queryseq5);
- query3 = Shortread_fullpointer_uc(queryseq3);
-
assert(stage3pairarray[0]->private5p == true);
assert(stage3pairarray[0]->private3p == true);
- Stage3end_display_prep(stage3pairarray[0]->hit5,query5,query5_compress_fwd,query5_compress_rev,
- genome);
- Stage3end_display_prep(stage3pairarray[0]->hit3,query3,query3_compress_fwd,query3_compress_rev,
- genome);
+ Stage3end_display_prep(stage3pairarray[0]->hit5,queryuc_ptr_5,queryrc5,query5_compress_fwd,query5_compress_rev,
+ stage3pairarray[0]->amb_resolve_5,/*first_read_p*/true);
+ Stage3end_display_prep(stage3pairarray[0]->hit3,queryuc_ptr_3,queryrc3,query3_compress_fwd,query3_compress_rev,
+ stage3pairarray[0]->amb_resolve_3,/*first_read_p*/false);
+ if (stage3pairarray[0]->amb_resolve_5 >= 0 || stage3pairarray[0]->amb_resolve_3 >= 0) {
+ stage3pairarray[0]->insertlength = compute_insertlength(stage3pairarray[0]);
+ assert((int) stage3pairarray[0]->insertlength > 0);
+ }
+
*first_absmq = stage3pairarray[0]->absmq_score;
*second_absmq = 0;
@@ -14569,14 +14567,28 @@ Stage3pair_eval_and_sort (int *npaths, int *first_absmq, int *second_absmq,
}
}
+ /* Resolve ambiguities, needed for computing mapq */
+ for (i = 0; i < *npaths; i++) {
+ assert(stage3pairarray[i]->private5p == true);
+ assert(stage3pairarray[i]->private3p == true);
+ Stage3end_display_prep(stage3pairarray[i]->hit5,queryuc_ptr_5,queryrc5,query5_compress_fwd,query5_compress_rev,
+ stage3pairarray[i]->amb_resolve_5,/*first_read_p*/true);
+ Stage3end_display_prep(stage3pairarray[i]->hit3,queryuc_ptr_3,queryrc3,query3_compress_fwd,query3_compress_rev,
+ stage3pairarray[i]->amb_resolve_3,/*first_read_p*/false);
+ if (stage3pairarray[i]->amb_resolve_5 >= 0 || stage3pairarray[i]->amb_resolve_3 >= 0) {
+ stage3pairarray[i]->insertlength = compute_insertlength(stage3pairarray[i]);
+ }
+ }
+
+
/* Compute mapq_loglik */
for (i = 0; i < *npaths; i++) {
stage3pairarray[i]->mapq_loglik =
Stage3end_compute_mapq(stage3pairarray[i]->hit5,query5_compress_fwd,query5_compress_rev,
quality_string_5,/*trim_terminals_p*/non_terminal_5p ? false : true);
stage3pairarray[i]->mapq_loglik +=
- Stage3end_compute_mapq(stage3pairarray[i]->hit3,query3_compress_fwd,query3_compress_rev,
- quality_string_3,/*trim_terminals_p*/non_terminal_3p ? false : true);
+ Stage3end_compute_mapq(stage3pairarray[i]->hit3,query3_compress_fwd,query3_compress_rev,
+ quality_string_3,/*trim_terminals_p*/non_terminal_3p ? false : true);
}
/* Sort by nmatches, then mapq, and then insert length */
@@ -14639,18 +14651,6 @@ Stage3pair_eval_and_sort (int *npaths, int *first_absmq, int *second_absmq,
total += (stage3pairarray[i]->mapq_loglik = fasterexp(stage3pairarray[i]->mapq_loglik));
}
- /* Prepare for display */
- query5 = Shortread_fullpointer_uc(queryseq5);
- query3 = Shortread_fullpointer_uc(queryseq3);
- for (i = 0; i < compute_npaths; i++) {
- assert(stage3pairarray[i]->private5p == true);
- assert(stage3pairarray[i]->private3p == true);
- Stage3end_display_prep(stage3pairarray[i]->hit5,query5,query5_compress_fwd,query5_compress_rev,
- genome);
- Stage3end_display_prep(stage3pairarray[i]->hit3,query3,query3_compress_fwd,query3_compress_rev,
- genome);
- }
-
/* Obtain posterior probabilities of being true */
for (i = 0; i < compute_npaths; i++) {
stage3pairarray[i]->mapq_loglik /= total;
@@ -14733,15 +14733,15 @@ Stage3pair_remove_excess_terminals (List_T hitpairlist) {
-/* terminal alignments need to win on nmatches */
static List_T
Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_level, int suboptimal_mismatches,
Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
int querylength5, int querylength3, bool keep_gmap_p, bool finalp) {
- List_T optimal = NULL, p;
+ List_T optimal = NULL, p, q;
Stage3pair_T hitpair;
T hit5, hit3;
+ Substring_T substring;
int cutoff_level_5, cutoff_level_3, score;
int n;
int minscore5 = querylength5, minscore3 = querylength3, minscore = querylength5 + querylength3;
@@ -14749,13 +14749,9 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
#ifdef USE_OPTIMAL_SCORE_BINGO
int minscore_bingo = querylength5 + querylength3;
#endif
- int trim_left_5, trim_right_5, trim_left_3, trim_right_3;
- int min_trim_left_5 = querylength5, min_trim_right_5 = querylength5,
- min_trim_left_3 = querylength3, min_trim_right_3 = querylength3;
- int max_trim_left_terminal_5 = 0, max_trim_right_terminal_5 = 0,
- max_trim_left_terminal_3 = 0, max_trim_right_terminal_3 = 0;
+ int trim_left_5 = querylength5, trim_right_5 = querylength5,
+ trim_left_3 = querylength3, trim_right_3 = querylength3;
int nindelbreaks;
- bool non_double_terminal_p = false, non_terminal_5p = false, non_terminal_3p = false;
#ifdef TRANSLOC_SPECIAL
bool non_translocation_p = false;
@@ -14772,39 +14768,6 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
return hitpairlist;
}
- p = hitpairlist;
- while (non_double_terminal_p == false && p != NULL) {
- hitpair = (Stage3pair_T) p->first;
- if (hitpair->hit5->hittype != TERMINAL) {
- non_double_terminal_p = true;
- } else if (hitpair->hit3->hittype != TERMINAL) {
- non_double_terminal_p = true;
- }
- p = p->rest;
- }
- debug6(printf("non_double_terminal_p: %d\n",non_double_terminal_p));
-
- p = hitpairlist;
- while (non_terminal_5p == false && p != NULL) {
- hitpair = (Stage3pair_T) p->first;
- if (hitpair->hit5->hittype != TERMINAL) {
- non_terminal_5p = true;
- }
- p = p->rest;
- }
- debug6(printf("non_terminal_5p: %d\n",non_terminal_5p));
-
- p = hitpairlist;
- while (non_terminal_3p == false && p != NULL) {
- hitpair = (Stage3pair_T) p->first;
- if (hitpair->hit3->hittype != TERMINAL) {
- non_terminal_3p = true;
- }
- p = p->rest;
- }
- debug6(printf("non_terminal_3p: %d\n",non_terminal_3p));
-
-
/* Use eventrim for comparing alignments */
for (p = hitpairlist; p != NULL; p = p->rest) {
@@ -14816,109 +14779,72 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
hit5->genomicstart - hit5->chroffset,hit5->genomicend - hit5->chroffset,hittype_string(hit5->hittype),
hit5->trim_left,hit5->trim_left_splicep ? " (splice)" : "",
hit5->trim_right,hit5->trim_right_splicep ? " (splice)" : "",
- hit5->start_amb_length,hit5->end_amb_length,
+ start_amb_length(hit5),end_amb_length(hit5),
hit3->genomicstart - hit3->chroffset,hit3->genomicend - hit3->chroffset,hittype_string(hit3->hittype),
hit3->trim_left,hit3->trim_left_splicep ? " (splice)" : "",
hit3->trim_right,hit3->trim_right_splicep ? " (splice)" : "",
- hit3->start_amb_length,hit3->end_amb_length));
+ start_amb_length(hit3),end_amb_length(hit3)));
+
if (hit5->hittype == TERMINAL) {
- /* Don't allow terminals to set trims, because they don't attempt to extend to ends */
-#if 0
- if (hit5->trim_left > max_trim_left_terminal_5) {
- max_trim_left_terminal_5 = hit5->trim_left;
- }
- if (hit5->trim_right > max_trim_right_terminal_5) {
- max_trim_right_terminal_5 = hit5->trim_right;
- }
-#endif
+ /* Don't allow terminals to set trims */
+#if 0
} else if ((hit5->hittype == INSERTION || hit5->hittype == DELETION) &&
- (hit5->indel_pos < 15 || hit5->indel_pos > hit5->querylength_adj - 15)) {
+ (hit5->indel_pos < 15 || hit5->indel_pos > hit5->querylength - 15)) {
/* Don't allow end indels to set trims */
+#endif
} else {
if (hit5->trim_left_splicep == true) {
- if (hit5->trim_left > max_trim_left_terminal_5) {
- max_trim_left_terminal_5 = hit5->trim_left;
- }
- } else if (hit5->trim_left < min_trim_left_5) {
- min_trim_left_5 = hit5->trim_left;
+ /* Skip */
+ } else if (hit5->trim_left < trim_left_5) {
+ trim_left_5 = hit5->trim_left;
}
if (hit5->trim_right_splicep == true) {
- if (hit5->trim_right > max_trim_right_terminal_5) {
- max_trim_right_terminal_5 = hit5->trim_right;
- }
- } else if (hit5->trim_right < min_trim_right_5) {
- min_trim_right_5 = hit5->trim_right;
+ /* Skip */
+ } else if (hit5->trim_right < trim_right_5) {
+ trim_right_5 = hit5->trim_right;
}
}
if (hit3->hittype == TERMINAL) {
- /* Don't allow terminals to set trims, because they don't attempt to extend to ends */
-#if 0
- if (hit3->trim_left > max_trim_left_terminal_3) {
- max_trim_left_terminal_3 = hit3->trim_left;
- }
- if (hit3->trim_right > max_trim_right_terminal_3) {
- max_trim_right_terminal_3 = hit3->trim_right;
- }
-#endif
+ /* Don't allow terminals to set trims */
+#if 0
} else if ((hit3->hittype == INSERTION || hit3->hittype == DELETION) &&
- (hit3->indel_pos < 15 || hit3->indel_pos > hit3->querylength_adj - 15)) {
+ (hit3->indel_pos < 15 || hit3->indel_pos > hit3->querylength - 15)) {
/* Don't allow end indels to set trims */
+#endif
} else {
if (hit3->trim_left_splicep == true) {
- if (hit3->trim_left > max_trim_left_terminal_3) {
- max_trim_left_terminal_3 = hit3->trim_left;
- }
- } else if (hit3->trim_left < min_trim_left_3) {
- min_trim_left_3 = hit3->trim_left;
+ /* Skip */
+ } else if (hit3->trim_left < trim_left_3) {
+ trim_left_3 = hit3->trim_left;
}
if (hit3->trim_right_splicep == true) {
- if (hit3->trim_right > max_trim_right_terminal_3) {
- max_trim_right_terminal_3 = hit3->trim_right;
- }
- } else if (hit3->trim_right < min_trim_right_3) {
- min_trim_right_3 = hit3->trim_right;
+ /* Skip */
+ } else if (hit3->trim_right < trim_right_3) {
+ trim_right_3 = hit3->trim_right;
}
}
}
- if (min_trim_left_5 == querylength5) {
- trim_left_5 = max_trim_left_terminal_5;
- } else {
- trim_left_5 = (max_trim_left_terminal_5 > min_trim_left_5) ? max_trim_left_terminal_5 : min_trim_left_5;
+ if (trim_left_5 == querylength5) {
+ trim_left_5 = 0;
}
- if (min_trim_right_5 == querylength5) {
- trim_right_5 = max_trim_right_terminal_5;
- } else {
- trim_right_5 = (max_trim_right_terminal_5 > min_trim_right_5) ? max_trim_right_terminal_5 : min_trim_right_5;
+ if (trim_right_5 == querylength5) {
+ trim_right_5 = 0;
}
-
- if (min_trim_left_3 == querylength3) {
- trim_left_3 = max_trim_left_terminal_3;
- } else {
- trim_left_3 = (max_trim_left_terminal_3 > min_trim_left_3) ? max_trim_left_terminal_3 : min_trim_left_3;
+ if (trim_left_3 == querylength3) {
+ trim_left_3 = 0;
}
- if (min_trim_right_3 == querylength3) {
- trim_right_3 = max_trim_right_terminal_3;
- } else {
- trim_right_3 = (max_trim_right_terminal_3 > min_trim_right_3) ? max_trim_right_terminal_3 : min_trim_right_3;
+ if (trim_right_3 == querylength3) {
+ trim_right_3 = 0;
}
- debug6(printf("non-terminals: hit5 min_trim_left: %d, min_trim_right %d\n",
- min_trim_left_5,min_trim_right_5));
- debug6(printf("prefinal-terminals: hit5 max_trim_left: %d, max_trim_right %d\n",
- max_trim_left_terminal_5,max_trim_right_terminal_5));
- debug6(printf("overall: trim_left %d, trim_right %d\n",trim_left_5,trim_right_5));
-
- debug6(printf("non-terminals: hit3 min_trim_left: %d, min_trim_right %d\n",
- min_trim_left_3,min_trim_right_3));
- debug6(printf("prefinal-terminals: hit3 max_trim_left: %d, max_trim_right %d\n",
- max_trim_left_terminal_3,max_trim_right_terminal_3));
- debug6(printf("overall: trim_left %d, trim_right %d\n",trim_left_3,trim_right_3));
+ debug6(printf("overall 5': trim_left %d, trim_right %d\n",trim_left_5,trim_right_5));
+ debug6(printf("overall 3': trim_left %d, trim_right %d\n",trim_left_3,trim_right_3));
for (p = hitpairlist; p != NULL; p = p->rest) {
@@ -14926,10 +14852,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
hit5 = hitpair->hit5;
hit3 = hitpair->hit3;
- if (hit5->hittype == TERMINAL && non_double_terminal_p == true && finalp == false) {
- /* Ignore */
- hit5->score_eventrim = 0;
- } else if (hit5->hittype == GMAP) {
+ if (hit5->hittype == GMAP) {
hit5->score_eventrim = 0; /* was hit5->penalties */
debug6(printf("score 5' GMAP:"));
#if 0
@@ -14955,44 +14878,38 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
#endif
hit5->score_eventrim += Pair_nmismatches_region(&nindelbreaks,hit5->pairarray,hit5->npairs,
- trim_left_5,trim_right_5,hit5->start_amb_length,hit5->end_amb_length,
- hit5->querylength_adj);
+ trim_left_5,trim_right_5,start_amb_length(hit5),end_amb_length(hit5),
+ hit5->querylength);
debug6(printf(" add nmismatches %d.",Pair_nmismatches_region(&nindelbreaks,hit5->pairarray,hit5->npairs,
- trim_left_5,trim_right_5,hit5->start_amb_length,hit5->end_amb_length,
- hit5->querylength_adj)));
+ trim_left_5,trim_right_5,start_amb_length(hit5),end_amb_length(hit5),
+ hit5->querylength)));
+ if (start_amb_length(hit5) > 0) {
+ debug6(printf(" add penalty for start amb %d.",amb_penalty));
+ hit5->score_eventrim += amb_penalty;
+ }
+ if (end_amb_length(hit5) > 0) {
+ debug6(printf(" add penalty for end amb %d.",amb_penalty));
+ hit5->score_eventrim += amb_penalty;
+ }
+
#ifdef SCORE_INDELS
hit5->score_eventrim += indel_penalty_middle * nindelbreaks;
debug6(printf(" add indelbreaks %d.",indel_penalty_middle * nindelbreaks));
#endif
- if (hit5->start_amb_prob < 0.9) {
- hit5->score_eventrim += hit5->start_amb_length / ambig_end_interval;
- debug6(printf(" add amb start %d/%d (prob %f).",hit5->start_amb_length,ambig_end_interval,hit5->start_amb_prob));
- }
- if (hit5->end_amb_prob < 0.9) {
- hit5->score_eventrim += hit5->end_amb_length / ambig_end_interval;
- debug6(printf(" add amb end %d/%d (prob %f).",hit5->end_amb_length,ambig_end_interval,hit5->end_amb_prob));
- }
debug6(printf(" RESULT: %d\n",hit5->score_eventrim));
} else {
+ hit5->score_eventrim = 0; /* was hit5->penalties */
debug6(printf("score 5' OTHER:"));
- hit5->score_eventrim = hit5->penalties;
- debug6(printf(" penalties %d.",hit5->penalties));
-
- hit5->score_eventrim += Substring_count_mismatches_region(hit5->substring0,trim_left_5,trim_right_5,
- query5_compress_fwd,query5_compress_rev);
- debug6(printf(" substring 0 %d.",Substring_count_mismatches_region(hit5->substring0,trim_left_5,trim_right_5,
- query5_compress_fwd,query5_compress_rev)));
-
- hit5->score_eventrim += Substring_count_mismatches_region(hit5->substring1,trim_left_5,trim_right_5,
- query5_compress_fwd,query5_compress_rev);
- debug6(printf(" substring 1 %d.",Substring_count_mismatches_region(hit5->substring1,trim_left_5,trim_right_5,
- query5_compress_fwd,query5_compress_rev)));
- hit5->score_eventrim += Substring_count_mismatches_region(hit5->substring2,trim_left_5,trim_right_5,
- query5_compress_fwd,query5_compress_rev);
- debug6(printf(" substring 2 %d.",Substring_count_mismatches_region(hit5->substring2,trim_left_5,trim_right_5,
- query5_compress_fwd,query5_compress_rev)));
+ for (q = hit5->substrings_1toN; q != NULL; q = List_next(q)) {
+ substring = (Substring_T) List_head(q);
+ hit5->score_eventrim += Substring_count_mismatches_region(substring,trim_left_5,trim_right_5,
+ query5_compress_fwd,query5_compress_rev);
+ debug6(printf(" substring (%d..%d) %d.",trim_left_5,trim_right_5,
+ Substring_count_mismatches_region(substring,trim_left_5,trim_right_5,
+ query5_compress_fwd,query5_compress_rev)));
+ }
#ifdef SCORE_INDELS
/* Needs to match GMAP scoring */
@@ -15007,10 +14924,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
debug6(printf(" RESULT: %d\n",hit5->score_eventrim));
}
- if (hit3->hittype == TERMINAL && non_double_terminal_p == true && finalp == false) {
- /* Ignore */
- hit3->score_eventrim = 0;
- } else if (hit3->hittype == GMAP) {
+ if (hit3->hittype == GMAP) {
hit3->score_eventrim = 0; /* was hit3->penalties */
debug6(printf("score 3' GMAP:"));
#if 0
@@ -15036,15 +14950,26 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
#endif
hit3->score_eventrim += Pair_nmismatches_region(&nindelbreaks,hit3->pairarray,hit3->npairs,
- trim_left_3,trim_right_3,hit3->start_amb_length,hit3->end_amb_length,
- hit3->querylength_adj);
+ trim_left_3,trim_right_3,start_amb_length(hit3),end_amb_length(hit3),
+ hit3->querylength);
debug6(printf(" add nmismatches %d.",Pair_nmismatches_region(&nindelbreaks,hit3->pairarray,hit3->npairs,
- trim_left_3,trim_right_3,hit3->start_amb_length,hit3->end_amb_length,
- hit3->querylength_adj)));
+ trim_left_3,trim_right_3,start_amb_length(hit3),end_amb_length(hit3),
+ hit3->querylength)));
+
+ if (start_amb_length(hit3) > 0) {
+ debug6(printf(" add penalty for start amb %d.",amb_penalty));
+ hit3->score_eventrim += amb_penalty;
+ }
+ if (end_amb_length(hit3) > 0) {
+ debug6(printf(" add penalty for end amb %d.",amb_penalty));
+ hit3->score_eventrim += amb_penalty;
+ }
+
#ifdef SCORE_INDELS
hit3->score_eventrim += indel_penalty_middle * nindelbreaks;
debug6(printf(" add indelbreaks %d.",indel_penalty_middle * nindelbreaks));
#endif
+#if 0
if (hit3->start_amb_prob < 0.9) {
hit3->score_eventrim += hit3->start_amb_length / ambig_end_interval;
debug6(printf(" add amb start %d/%d (prob %f).",hit3->start_amb_length,ambig_end_interval,hit3->start_amb_prob));
@@ -15053,26 +14978,21 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
hit3->score_eventrim += hit3->end_amb_length / ambig_end_interval;
debug6(printf(" add amb end %d/%d (prob %f).",hit3->end_amb_length,ambig_end_interval,hit3->end_amb_prob));
}
+#endif
debug6(printf(" RESULT: %d\n",hit3->score_eventrim));
+
} else {
+ hit3->score_eventrim = 0; /* was hit3->penalties */
debug6(printf("score 3' OTHER:"));
- hit3->score_eventrim = hit3->penalties;
- debug6(printf(" penalties %d.",hit3->penalties));
-
- hit3->score_eventrim += Substring_count_mismatches_region(hit3->substring0,trim_left_3,trim_right_3,
- query3_compress_fwd,query3_compress_rev);
- debug6(printf(" substring 0 %d.",Substring_count_mismatches_region(hit3->substring0,trim_left_3,trim_right_3,
- query3_compress_fwd,query3_compress_rev)));
- hit3->score_eventrim += Substring_count_mismatches_region(hit3->substring1,trim_left_3,trim_right_3,
- query3_compress_fwd,query3_compress_rev);
- debug6(printf(" substring 1 %d.",Substring_count_mismatches_region(hit3->substring1,trim_left_3,trim_right_3,
- query3_compress_fwd,query3_compress_rev)));
-
- hit3->score_eventrim += Substring_count_mismatches_region(hit3->substring2,trim_left_3,trim_right_3,
- query3_compress_fwd,query3_compress_rev);
- debug6(printf(" substring 2 %d.",Substring_count_mismatches_region(hit3->substring2,trim_left_3,trim_right_3,
- query3_compress_fwd,query3_compress_rev)));
+ for (q = hit3->substrings_1toN; q != NULL; q = List_next(q)) {
+ substring = (Substring_T) List_head(q);
+ hit3->score_eventrim += Substring_count_mismatches_region(substring,trim_left_3,trim_right_3,
+ query3_compress_fwd,query3_compress_rev);
+ debug6(printf(" substring (%d..%d) %d.",trim_left_3,trim_right_3,
+ Substring_count_mismatches_region(substring,trim_left_3,trim_right_3,
+ query3_compress_fwd,query3_compress_rev)));
+ }
#ifdef SCORE_INDELS
/* Needs to match GMAP scoring */
@@ -15103,21 +15023,17 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
hitpair->hit5->score_eventrim,hitpair->hit3->score_eventrim,
hitpair->insertlength,hitpair->outerlength));
- if (hitpair->hit5->hittype == TERMINAL && non_terminal_5p == true) {
- /* Don't use to determine minscore5 */
- } else if (hitpair->hit5->score_eventrim < minscore5) {
+ if (hitpair->hit5->score_eventrim < minscore5) {
minscore5 = hitpair->hit5->score_eventrim;
}
- if (hitpair->hit3->hittype == TERMINAL && non_terminal_3p == true) {
- /* Don't use to determine minscore3 */
- } else if (hitpair->hit3->score_eventrim < minscore3) {
+ if (hitpair->hit3->score_eventrim < minscore3) {
minscore3 = hitpair->hit3->score_eventrim;
}
}
debug6(printf("Stage3pair_optimal_score over %d pairs: minscore = %d and %d + subopt:%d\n",
n,minscore5,minscore3,suboptimal_mismatches));
- if (non_double_terminal_p == true && finalp == false) {
+ if (finalp == false) {
/* finalp == false. Add suboptimal_mismatches to each end. */
minscore5 += suboptimal_mismatches;
minscore3 += suboptimal_mismatches;
@@ -15127,13 +15043,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
for (p = hitpairlist; p != NULL; p = p->rest) {
hitpair = (Stage3pair_T) p->first;
- if (hitpair->hit5->hittype == TERMINAL || hitpair->hit3->hittype == TERMINAL) {
- debug6(printf("Prefinal: Keeping a hit pair of type %s-%s with score_eventrim %d and %d, because finalp is false\n",
- hittype_string(hitpair->hit5->hittype),hittype_string(hitpair->hit3->hittype),
- hitpair->hit5->score_eventrim,hitpair->hit3->score_eventrim));
- optimal = List_push(optimal,hitpair);
-
- } else if (keep_gmap_p == true && (hitpair->hit5->hittype == GMAP || hitpair->hit3->hittype == GMAP)) {
+ if (keep_gmap_p == true && (hitpair->hit5->hittype == GMAP || hitpair->hit3->hittype == GMAP)) {
/* GMAP hits already found to be better than their corresponding terminals */
debug6(printf("Prefinal: Keeping a hit pair of type %s-%s with score_eventrim %d and %d, because keep_gmap_p is true\n",
hittype_string(hitpair->hit5->hittype),hittype_string(hitpair->hit3->hittype),
@@ -15156,8 +15066,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
}
} else {
- /* non_double_terminal_p == false (so need to prune results) or
- finalp == true. Add suboptimal_mismatches to overall score. */
+ /* finalp == true. Add suboptimal_mismatches to overall score. */
#if 0
if (minscore5 + minscore3 < minscore) {
cutoff_level = minscore + suboptimal_mismatches;
@@ -15181,29 +15090,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
hitpair->score_eventrim,hitpair->hit5->score_eventrim,hitpair->hit3->score_eventrim,
hitpair->insertlength,hitpair->outerlength));
-#if 0
- if (hitpair->hit5->hittype != TERMINAL) {
- score5 = hitpair->hit5->score_eventrim;
- } else if (non_terminal_5p == true) {
- score5 = hitpair->hit5->score_eventrim;
- } else if (non_double_terminal_p == false) {
- score3 = hitpair->hit5->score_eventrim;
- } else {
- score5 = 0;
- }
- if (hitpair->hit3->hittype != TERMINAL) {
- score3 = hitpair->hit3->score_eventrim;
- } else if (non_terminal_3p == true) {
- score3 = hitpair->hit3->score_eventrim;
- } else if (non_double_terminal_p == false) {
- score3 = hitpair->hit3->score_eventrim;
- } else {
- score3 = 0;
- }
- score = score5 + score3;
-#else
score = hitpair->score_eventrim;
-#endif
if (keep_gmap_p == true && (hitpair->hit5->hittype == GMAP || hitpair->hit3->hittype == GMAP)) {
/* GMAP hits already found to be better than their corresponding terminals */
@@ -15265,7 +15152,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist, int cutoff_
Stage3pair_free(&hitpair);
} else {
debug6(printf("Keeping a hit pair with absdifflength %d and outerlength %d\n",
- hitpair->absdifflength,hitpair->outerlength));
+ hitpair->absdifflength,hitpair->outerlength));
optimal = List_push(optimal,hitpair);
}
}
@@ -15448,7 +15335,7 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
int querylength5, int querylength3, int maxpairedpaths,
int splicing_penalty, int genestrand) {
int new_found_score = *found_score;
- int pairscore, score5_start, score5_end, score5, score3, i, j;
+ int frontier_score, score5_start, score5_end, score5, score3, i, j;
List_T q, prev_start;
Stage3pair_T stage3pair;
T *hits5, *hits3, hit5, hit3;
@@ -15457,20 +15344,20 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
prev_start = hitpairs;
- pairscore = 0;
- while (*abort_pairing_p == false && pairscore <= *found_score + subopt_levels &&
- pairscore <= cutoff_level_5 + cutoff_level_3) {
- debug5a(printf("pairscore = %d\n",pairscore));
- if ((score5_start = pairscore - cutoff_level_3) < 0) {
+ frontier_score = 0;
+ while (*abort_pairing_p == false && frontier_score <= *found_score + subopt_levels &&
+ frontier_score <= cutoff_level_5 + cutoff_level_3) {
+ debug5a(printf("frontier_score = %d\n",frontier_score));
+ if ((score5_start = frontier_score - cutoff_level_3) < 0) {
score5_start = 0;
}
- score5_end = (cutoff_level_5 < pairscore) ? cutoff_level_5 : pairscore;
+ score5_end = (cutoff_level_5 < frontier_score) ? cutoff_level_5 : frontier_score;
for (score5 = score5_start; score5 <= score5_end; score5++) {
debug5a(printf("score5 = %d (cutoff %d), score3 = %d (cutoff %d)\n",
- score5,cutoff_level_5,pairscore-score5,cutoff_level_3));
- score3 = pairscore - score5;
+ score5,cutoff_level_5,frontier_score-score5,cutoff_level_3));
+ score3 = frontier_score - score5;
assert(score3 <= cutoff_level_3);
- if (1 || (score5 <= cutoff_level_5 && ((score3 = pairscore - score5) <= cutoff_level_3))) {
+ if (1 || (score5 <= cutoff_level_5 && ((score3 = frontier_score - score5) <= cutoff_level_3))) {
/* Sort this level if necessary: 5' by genomicend, 3' by genomicstart */
if (sorted5p[score5] == false) {
if (nhits5_plus[score5] > 0) {
@@ -15498,7 +15385,7 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
nhits3 = nhits3_plus[score3];
if (nhits5 > 0 && nhits3 > 0) {
debug5(printf("at score %d, nhits5_plus = %d; at score %d, nhits3_plus = %d\n",
- score5,nhits5,score3,nhits3));
+ score5,nhits5,score3,nhits3));
i = j = 0;
while (*abort_pairing_p == false && i < nhits5) {
@@ -15572,15 +15459,21 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
/*pairtype*/CONCORDANT,splicing_penalty,
/*private5p*/false,/*private3p*/false,/*expect_concordant_p*/true)) != NULL) {
- if (hit5->start_amb_length > 0 || hit5->end_amb_length > 0 ||
- hit3->start_amb_length > 0 || hit3->end_amb_length > 0) {
+ debug5(printf("Have new pair with scores %d + %d, compared with new_found_score %d\n",hit5->score,hit3->score,new_found_score));
+#if 0
+ /* Doesn't work with new substrings */
+ if (start_amb_length(hit5) > 0 || end_amb_length(hit5) > 0 ||
+ start_amb_length(hit3) > 0 || end_amb_length(hit3) > 0) {
/* Don't use ambiguous splices to update found_score*/
hitpairs = List_push(hitpairs,(void *) stage3pair);
(*nconcordant)++;
- } else if (pairscore < new_found_score) {
- new_found_score = pairscore;
- debug5(printf(" => tentatively updating found_score to be %d",new_found_score));
+ } /*else*/
+#endif
+ if (hit5->score + hit3->score < new_found_score) {
+ /* Don't use frontier_score here, which is the trimmed_score. Use the full score, to motivate stage1hr to find longer alignments */
+ new_found_score = hit5->score + hit3->score;
+ debug5(printf(" => tentatively updating found_score to be %d = %d + %d\n",new_found_score,hit5->score,hit3->score));
hitpairs = List_push(hitpairs,(void *) stage3pair);
(*nconcordant)++;
@@ -15688,15 +15581,21 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
/*pairtype*/CONCORDANT,splicing_penalty,
/*private5p*/false,/*private3p*/false,/*expect_concordant_p*/true)) != NULL) {
- if (hit5->start_amb_length > 0 || hit5->end_amb_length > 0 ||
- hit3->start_amb_length > 0 || hit3->end_amb_length > 0) {
+ debug5(printf("Have new pair with scores %d + %d, compared with new_found_score %d\n",hit5->score,hit3->score,new_found_score));
+#if 0
+ /* Doesn't work with new substrings */
+ if (start_amb_length(hit5) > 0 || end_amb_length(hit5) > 0 ||
+ start_amb_length(hit3) > 0 || end_amb_length(hit3) > 0) {
/* Don't use ambiguous splices to update found_score*/
hitpairs = List_push(hitpairs,(void *) stage3pair);
(*nconcordant)++;
- } else if (pairscore < new_found_score) {
- new_found_score = pairscore;
- debug5(printf(" => updating new_found_score to be %d",new_found_score));
+ } /*else*/
+#endif
+ if (hit5->score + hit3->score < new_found_score) {
+ /* Don't use frontier_score here, which is the trimmed_score. Use the full score, to motivate stage1hr to find longer alignments */
+ new_found_score = hit5->score + hit3->score;
+ debug5(printf(" => tentatively updating found_score to be %d = %d + %d\n",new_found_score,hit5->score,hit3->score));
hitpairs = List_push(hitpairs,(void *) stage3pair);
(*nconcordant)++;
@@ -15906,16 +15805,17 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
*found_score = new_found_score;
}
- pairscore++;
+ frontier_score++;
}
return hitpairs;
}
+/* Use nmismatches_bothdiff (which is a trimmed_score), instead of stage3end->score */
static int
-sort_hits_by_score (T **hits_plus, T **hits_minus, int *nhits_plus, int *nhits_minus,
- List_T *hitarray, int narray, int cutoff_level) {
+sort_hits_by_trimmed_score (T **hits_plus, T **hits_minus, int *nhits_plus, int *nhits_minus,
+ List_T *hitarray, int narray, int cutoff_level) {
int score;
int nhits, i;
List_T q;
@@ -15928,13 +15828,13 @@ sort_hits_by_score (T **hits_plus, T **hits_minus, int *nhits_plus, int *nhits_m
for (q = hitarray[i]; q != NULL; q = q->rest) {
hit = (T) q->first;
debug5(printf(" : %p score %d, type %s\n",hit,hit->score,hittype_string(hit->hittype)));
- assert(hit->score >= 0);
- if (hit->score > cutoff_level) {
- debug5(printf("Skipping hit with score %d > cutoff level %d\n",hit->score,cutoff_level));
+ assert(hit->nmismatches_bothdiff >= 0);
+ if (hit->nmismatches_bothdiff > cutoff_level) {
+ debug5(printf("Skipping hit with trimmed score %d > cutoff level %d\n",hit->nmismatches_bothdiff,cutoff_level));
} else if (hit->plusp == true) {
- nhits_plus[hit->score]++;
+ nhits_plus[hit->nmismatches_bothdiff]++;
} else {
- nhits_minus[hit->score]++;
+ nhits_minus[hit->nmismatches_bothdiff]++;
}
}
}
@@ -15984,12 +15884,12 @@ sort_hits_by_score (T **hits_plus, T **hits_minus, int *nhits_plus, int *nhits_m
for (i = 0; i < narray; i++) {
for (q = hitarray[i]; q != NULL; q = q->rest) {
hit = (T) q->first;
- if (hit->score > cutoff_level) {
+ if (hit->nmismatches_bothdiff > cutoff_level) {
/* Skip */
} else if (hit->plusp == true) {
- hits_plus[hit->score][nhits_plus[hit->score]++] = hit;
+ hits_plus[hit->nmismatches_bothdiff][nhits_plus[hit->nmismatches_bothdiff]++] = hit;
} else {
- hits_minus[hit->score][nhits_minus[hit->score]++] = hit;
+ hits_minus[hit->nmismatches_bothdiff][nhits_minus[hit->nmismatches_bothdiff]++] = hit;
}
}
}
@@ -16001,9 +15901,8 @@ sort_hits_by_score (T **hits_plus, T **hits_minus, int *nhits_plus, int *nhits_m
/* Finds concordant pairs if nconcordant is 0 */
List_T
Stage3_pair_up_concordant (bool *abort_pairing_p, int *found_score, int *nconcordant, int *nsamechr,
- List_T *samechr, List_T *conc_transloc, List_T *with_terminal,
+ List_T *samechr, List_T *conc_transloc,
List_T hitpairs, List_T *hitarray5, int narray5, List_T *hitarray3, int narray3,
- List_T terminals5, List_T terminals3,
int cutoff_level_5, int cutoff_level_3, int subopt_levels,
Univcoord_T *splicesites,
Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
@@ -16028,8 +15927,8 @@ Stage3_pair_up_concordant (bool *abort_pairing_p, int *found_score, int *nconcor
hits5_minus = (T **) MALLOCA((cutoff_level_5+1) * sizeof(T *));
nhits5_plus = (int *) CALLOCA(cutoff_level_5+1,sizeof(int));
nhits5_minus = (int *) CALLOCA(cutoff_level_5+1,sizeof(int));
- cutoff_level_hits5 = sort_hits_by_score(hits5_plus,hits5_minus,nhits5_plus,nhits5_minus,
- hitarray5,narray5,cutoff_level_5);
+ cutoff_level_hits5 = sort_hits_by_trimmed_score(hits5_plus,hits5_minus,nhits5_plus,nhits5_minus,
+ hitarray5,narray5,cutoff_level_5);
sorted_hits5_p = (bool *) CALLOCA(cutoff_level_hits5+1,sizeof(bool));
@@ -16038,31 +15937,30 @@ Stage3_pair_up_concordant (bool *abort_pairing_p, int *found_score, int *nconcor
hits3_minus = (T **) MALLOCA((cutoff_level_3+1) * sizeof(T *));
nhits3_plus = (int *) CALLOCA(cutoff_level_3+1,sizeof(int));
nhits3_minus = (int *) CALLOCA(cutoff_level_3+1,sizeof(int));
- cutoff_level_hits3 = sort_hits_by_score(hits3_plus,hits3_minus,nhits3_plus,nhits3_minus,
- hitarray3,narray3,cutoff_level_3);
+ cutoff_level_hits3 = sort_hits_by_trimmed_score(hits3_plus,hits3_minus,nhits3_plus,nhits3_minus,
+ hitarray3,narray3,cutoff_level_3);
sorted_hits3_p = (bool *) CALLOCA(cutoff_level_hits3+1,sizeof(bool));
- if (terminals5 == NULL && terminals3 == NULL) {
- /* Look for concordant pairs among the non-terminals */
- hitpairs = pair_up_concordant_aux(&(*abort_pairing_p),&(*found_score),&(*nconcordant),&(*nsamechr),
- &(*samechr),&(*conc_transloc),hitpairs,
- hits5_plus,nhits5_plus,hits5_minus,nhits5_minus,
- hits3_plus,nhits3_plus,hits3_minus,nhits3_minus,
- /*sorted5p*/sorted_hits5_p,/*sorted3p*/sorted_hits3_p,
- cutoff_level_hits5,cutoff_level_hits3,subopt_levels,
- splicesites,query5_compress_fwd,query5_compress_rev,
- query3_compress_fwd,query3_compress_rev,querylength5,querylength3,
- maxpairedpaths,splicing_penalty,genestrand);
+ /* Look for concordant pairs among the non-terminals */
+ hitpairs = pair_up_concordant_aux(&(*abort_pairing_p),&(*found_score),&(*nconcordant),&(*nsamechr),
+ &(*samechr),&(*conc_transloc),hitpairs,
+ hits5_plus,nhits5_plus,hits5_minus,nhits5_minus,
+ hits3_plus,nhits3_plus,hits3_minus,nhits3_minus,
+ /*sorted5p*/sorted_hits5_p,/*sorted3p*/sorted_hits3_p,
+ cutoff_level_hits5,cutoff_level_hits3,subopt_levels,
+ splicesites,query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,querylength5,querylength3,
+ maxpairedpaths,splicing_penalty,genestrand);
- } else {
+#if 0
/* Look for single terminals */
if (terminals3 != NULL) {
terminals3_plus = (T **) MALLOCA((cutoff_level_3+1) * sizeof(T *));
terminals3_minus = (T **) MALLOCA((cutoff_level_3+1) * sizeof(T *));
nterminals3_plus = (int *) CALLOCA(cutoff_level_3+1,sizeof(int));
nterminals3_minus = (int *) CALLOCA(cutoff_level_3+1,sizeof(int));
- cutoff_level_terminals3 = sort_hits_by_score(terminals3_plus,terminals3_minus,nterminals3_plus,nterminals3_minus,
- &terminals3,/*narray3*/1,cutoff_level_3);
+ cutoff_level_terminals3 = sort_hits_by_trimmed_score(terminals3_plus,terminals3_minus,nterminals3_plus,nterminals3_minus,
+ &terminals3,/*narray3*/1,cutoff_level_3);
sorted_terminals3_p = (bool *) CALLOCA(cutoff_level_terminals3+1,sizeof(bool));
/* Do not allow terminals to alter found_score */
@@ -16083,8 +15981,8 @@ Stage3_pair_up_concordant (bool *abort_pairing_p, int *found_score, int *nconcor
terminals5_minus = (T **) MALLOCA((cutoff_level_5+1) * sizeof(T *));
nterminals5_plus = (int *) CALLOCA(cutoff_level_5+1,sizeof(int));
nterminals5_minus = (int *) CALLOCA(cutoff_level_5+1,sizeof(int));
- cutoff_level_terminals5 = sort_hits_by_score(terminals5_plus,terminals5_minus,nterminals5_plus,nterminals5_minus,
- &terminals5,/*narray5*/1,cutoff_level_5);
+ cutoff_level_terminals5 = sort_hits_by_trimmed_score(terminals5_plus,terminals5_minus,nterminals5_plus,nterminals5_minus,
+ &terminals5,/*narray5*/1,cutoff_level_5);
sorted_terminals5_p = (bool *) CALLOCA(cutoff_level_terminals5+1,sizeof(bool));
/* Do not allow terminals to alter found_score */
@@ -16140,7 +16038,7 @@ Stage3_pair_up_concordant (bool *abort_pairing_p, int *found_score, int *nconcor
FREEA(nterminals5_minus);
}
}
-
+#endif
for (score3 = 0; score3 <= cutoff_level_hits3; score3++) {
FREE(hits3_plus[score3]);
@@ -16162,8 +16060,8 @@ Stage3_pair_up_concordant (bool *abort_pairing_p, int *found_score, int *nconcor
FREEA(nhits5_plus);
FREEA(nhits5_minus);
- debug5(printf("Finished with Stage3_pair_up_concordant: %d concordant, %d samechr, %d conc_transloc, %d with_terminal\n",
- List_length(hitpairs),List_length(*samechr),List_length(*conc_transloc),List_length(*with_terminal)));
+ debug5(printf("Finished with Stage3_pair_up_concordant: %d concordant, %d samechr, %d conc_transloc\n",
+ List_length(hitpairs),List_length(*samechr),List_length(*conc_transloc)));
return hitpairs;
}
diff --git a/src/stage3hr.h b/src/stage3hr.h
index f1debe6..bfe3461 100644
--- a/src/stage3hr.h
+++ b/src/stage3hr.h
@@ -1,4 +1,4 @@
-/* $Id: stage3hr.h 161183 2015-03-18 17:04:33Z twu $ */
+/* $Id: stage3hr.h 166742 2015-06-02 02:00:54Z twu $ */
#ifndef STAGE3HR_INCLUDED
#define STAGE3HR_INCLUDED
@@ -16,16 +16,16 @@
#include "compress.h"
#include "resulthr.h"
#include "substring.h"
+#include "junction.h"
#include "pair.h"
-
+#include "filestring.h"
/* Should arrange in order of goodness, best to worst */
-typedef enum {EXACT, SUB, INSERTION, DELETION,
+typedef enum {EXACT, SUB, INSERTION, DELETION, SUBSTRINGS,
HALFSPLICE_DONOR, HALFSPLICE_ACCEPTOR, SPLICE, SAMECHR_SPLICE, TRANSLOC_SPLICE,
ONE_THIRD_SHORTEXON, TWO_THIRDS_SHORTEXON, SHORTEXON,
GMAP, TERMINAL} Hittype_T;
-
#define T Stage3end_T
typedef struct T *T;
@@ -33,7 +33,8 @@ typedef struct Stage3pair_T *Stage3pair_T;
extern void
-Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in,
+Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in, Genome_T genome_in,
+ Univ_IIT_T chromosome_iit_in, int nchromosomes_in, int circular_typeint_in,
IIT_T genes_iit_in, int *genes_divint_crosstable_in,
IIT_T tally_iit_in, int *tally_divint_crosstable_in,
IIT_T runlength_iit_in, int *runlength_divint_crosstable_in,
@@ -43,33 +44,11 @@ Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in,
int antistranded_penalty_in, bool favor_multiexon_p_in,
int gmap_min_nconsecutive_in, int index1part, int index1interval,
bool novelsplicingp_in, bool merge_samechr_p_in,
- bool *circularp_in, char *failedinput_root_in, bool fastq_format_p_in,
+ bool *circularp_in, char *failedinput_root_in,
bool print_m8_p_in, bool want_random_p_in);
-extern void
-Stage3hr_file_setup_single (FILE *fp_failedinput_in, FILE *fp_nomapping_in,
- FILE *fp_unpaired_uniq_in, FILE *fp_unpaired_circular_in, FILE *fp_unpaired_transloc_in,
- FILE *fp_unpaired_mult_in, FILE *fp_unpaired_mult_xs_1_in);
-
-extern void
-Stage3hr_file_setup_paired (FILE *fp_failedinput_1_in, FILE *fp_failedinput_2_in, FILE *fp_nomapping_in,
- FILE *fp_halfmapping_uniq_in, FILE *fp_halfmapping_circular_in, FILE *fp_halfmapping_transloc_in,
- FILE *fp_halfmapping_mult_in, FILE *fp_halfmapping_mult_xs_1_in, FILE *fp_halfmapping_mult_xs_2_in,
- FILE *fp_paired_uniq_circular_in, FILE *fp_paired_uniq_inv_in, FILE *fp_paired_uniq_scr_in,
- FILE *fp_paired_uniq_long_in, FILE *fp_paired_mult_in, FILE *fp_paired_mult_xs_1_in, FILE *fp_paired_mult_xs_2_in,
- FILE *fp_concordant_uniq_in, FILE *fp_concordant_circular_in, FILE *fp_concordant_transloc_in,
- FILE *fp_concordant_mult_in, FILE *fp_concordant_mult_xs_1_in, FILE *fp_concordant_mult_xs_2_in);
-
-extern void
-Stage3hr_file_setup_all (FILE *fp_failedinput_1_in, FILE *fp_failedinput_2_in, FILE *fp_nomapping_in,
- FILE *fp_unpaired_uniq_in, FILE *fp_unpaired_circular_in, FILE *fp_unpaired_transloc_in,
- FILE *fp_unpaired_mult_in, FILE *fp_unpaired_mult_xs_1_in, FILE *fp_unpaired_mult_xs_2_in,
- FILE *fp_halfmapping_uniq_in, FILE *fp_halfmapping_circular_in, FILE *fp_halfmapping_transloc_in,
- FILE *fp_halfmapping_mult_in, FILE *fp_halfmapping_mult_xs_1_in, FILE *fp_halfmapping_mult_xs_2_in,
- FILE *fp_paired_uniq_circular_in, FILE *fp_paired_uniq_inv_in, FILE *fp_paired_uniq_scr_in,
- FILE *fp_paired_uniq_long_in, FILE *fp_paired_mult_in, FILE *fp_paired_mult_xs_1_in, FILE *fp_paired_mult_xs_2_in,
- FILE *fp_concordant_uniq_in, FILE *fp_concordant_circular_in, FILE *fp_concordant_transloc_in,
- FILE *fp_concordant_mult_in, FILE *fp_concordant_mult_xs_1_in, FILE *fp_concordant_mult_xs_2_in);
+extern char *
+Stage3end_deletion_string (T this);
extern Hittype_T
Stage3end_hittype (T this);
@@ -140,7 +119,7 @@ Stage3end_gmap_end_endtype (T this);
extern int
Stage3end_nindels (T this);
extern int
-Stage3end_indel_pos (T this);
+Stage3end_querylength (T this);
extern bool
Stage3end_plusp (T this);
extern bool
@@ -157,12 +136,24 @@ extern int
Stage3end_circularpos (T this);
+extern char *
+Stage3end_deletion_string (T this);
+
+extern Junction_T
+Stage3end_junctionD (T this);
+extern Junction_T
+Stage3end_junctionA (T this);
+
+extern List_T
+Stage3end_substrings_LtoH (T this);
+extern List_T
+Stage3end_junctions_LtoH (T this);
+
extern Substring_T
Stage3end_substring1 (T this);
extern Substring_T
Stage3end_substring2 (T this);
-extern char *
-Stage3end_deletion_string (T this);
+
extern Substring_T
Stage3end_substring_donor (T this);
extern Substring_T
@@ -172,6 +163,9 @@ Stage3end_substringD (T this);
extern Substring_T
Stage3end_substringA (T this);
extern Substring_T
+Stage3end_substringS (T this);
+
+extern Substring_T
Stage3end_substring_low (T this, int hardclip_low);
extern Substring_T
Stage3end_substring_containing (T this, int querypos);
@@ -179,6 +173,10 @@ extern struct Pair_T *
Stage3end_pairarray (T this);
extern int
Stage3end_npairs (T this);
+extern List_T
+Stage3end_cigar_tokens (T this);
+extern bool
+Stage3end_gmap_intronp (T this);
extern Chrpos_T
Stage3end_distance (T this);
@@ -215,10 +213,6 @@ extern bool
Stage3end_start_ambiguous_p (T this);
extern bool
Stage3end_end_ambiguous_p (T this);
-extern int
-Stage3end_amb_length_start (T this);
-extern int
-Stage3end_amb_length_end (T this);
extern Univcoord_T *
Stage3end_start_ambcoords (T this);
extern Univcoord_T *
@@ -234,6 +228,10 @@ Stage3end_gmap_triedp (T this);
extern void
Stage3end_set_gmap_triedp (T this);
extern int
+Stage3end_substrings_querystart (T this);
+extern int
+Stage3end_substrings_queryend (T this);
+extern int
Stage3end_gmap_querystart (T this);
extern int
Stage3end_gmap_queryend (T this);
@@ -245,8 +243,6 @@ extern bool
Stage3end_contains_known_splicesite (T this);
extern bool
Stage3end_indel_contains_known_splicesite (bool *leftp, bool *rightp, T this);
-extern bool
-Stage3end_bad_stretch_p (T this, Compress_T query_compress_fwd, Compress_T query_compress_rev);
extern bool
Stage3end_genomicbound_from_start (Univcoord_T *genomicbound, T this, int overlap, Univcoord_T chroffset);
@@ -298,6 +294,31 @@ Stage3pair_free (Stage3pair_T *old);
extern T
Stage3end_copy (T old);
+
+extern T
+Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
+#ifdef LARGE_GENOMES
+ Uint8list_T lefts,
+#else
+ Uintlist_T lefts,
+#endif
+ Intlist_T nmismatches_list, List_T junctions, int querylength,
+ Compress_T query_compress,
+ Substring_T right_ambig, Substring_T left_ambig,
+ bool plusp, int genestrand, int sensedir, bool first_read_p,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
+ Chrpos_T chrlength, bool sarrayp);
+extern T
+Stage3end_substrings_run_gmap_plus (T this, char *queryuc_ptr, int querylength,
+ int genestrand, bool first_read_p,
+ int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool);
+extern T
+Stage3end_substrings_run_gmap_minus (T this, char *queryuc_ptr, int querylength,
+ int genestrand, bool first_read_p,
+ int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool);
+
extern T
Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Compress_T query_compress,
bool plusp, int genestrand, bool first_read_p,
@@ -330,7 +351,7 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
int max_mismatches_allowed, bool sarrayp);
extern T
Stage3end_new_splice (int *found_score, int donor_nmismatches, int acceptor_nmismatches,
- Substring_T donor, Substring_T acceptor, Chrpos_T distance,
+ Substring_T donor, Substring_T acceptor, double donor_prob, double acceptor_prob, Chrpos_T distance,
bool shortdistancep, int splicing_penalty, int querylength, int amb_length, double amb_prob,
#ifdef LARGE_GENOMES
Uint8list_T ambcoords_donor, Uint8list_T ambcoords_acceptor,
@@ -344,6 +365,7 @@ Stage3end_new_splice (int *found_score, int donor_nmismatches, int acceptor_nmis
bool first_read_p, int sensedir, bool sarrayp);
extern T
Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T acceptor, Substring_T shortexon,
+ double donor_prob, double shortexonA_prob, double shortexonD_prob, double acceptor_prob,
int amb_length_donor, int amb_length_acceptor, double amb_prob_donor, double amb_prob_acceptor,
#ifdef LARGE_GENOMES
Uint8list_T ambcoords_donor, Uint8list_T ambcoords_acceptor,
@@ -354,7 +376,7 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
Intlist_T amb_nmismatches_donor, Intlist_T amb_nmismatches_acceptor,
Doublelist_T amb_probs_donor, Doublelist_T amb_probs_acceptor,
bool copy_donor_p, bool copy_acceptor_p, bool copy_shortexon_p,
- int splicing_penalty, int querylength, int sensedir, bool sarrayp);
+ int splicing_penalty, int querylength, bool first_read_p, int sensedir, bool sarrayp);
extern T
Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_length,
@@ -362,9 +384,10 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
Splicetype_T ambig_splicetype_5, Splicetype_T ambig_splicetype_3,
double ambig_prob_5, double ambig_prob_3, double min_splice_prob,
struct Pair_T *pairarray, int npairs, int nsegments, int nintrons, int nindelbreaks,
- Univcoord_T left, int genomiclength, bool plusp, int genestrand, bool first_read_p, int querylength,
+ Univcoord_T left, int genomiclength, bool plusp, int genestrand, bool first_read_p,
+ char *accession, int querylength,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength,
- int cdna_direction, int sensedir);
+ int cdna_direction, int sensedir, GMAP_source_T gmap_source);
extern List_T
Stage3end_sort_bymatches (List_T hits);
@@ -374,13 +397,15 @@ Stage3end_sort_by_paired_seenp (List_T hits);
extern Stage3end_T *
Stage3end_eval_and_sort (int *npaths, int *first_absmq, int *second_absmq,
Stage3end_T *stage3array, int maxpaths, Shortread_T queryseq,
+ char *queryuc_ptr, char *queryrc,
Compress_T query_compress_fwd, Compress_T query_compress_rev,
- Genome_T genome, char *quality_string, bool displayp);
+ char *quality_string, bool displayp);
extern Stage3end_T *
Stage3end_eval_and_sort_guided (int *npaths, int *first_absmq, int *second_absmq, Stage3end_T guide,
Stage3end_T *stage3array, int maxpaths, Shortread_T queryseq,
+ char *queryuc_ptr, char *queryrc,
Compress_T query_compress_fwd, Compress_T query_compress_rev,
- Genome_T genome, char *quality_string, bool displayp);
+ char *quality_string, bool displayp);
extern List_T
Stage3pair_remove_excess_terminals (List_T hitpairlist);
extern List_T
@@ -410,7 +435,7 @@ Stage3_determine_pairtype (T hit5, T hit3);
/* If hit5 and hit3 are not NULL, then we know this is part of a pair */
extern void
-Stage3end_print (FILE *fp, T this, int score, Univ_IIT_T chromosome_iit, Shortread_T queryseq,
+Stage3end_print (Filestring_T fp, T this, int score, Univ_IIT_T chromosome_iit, Shortread_T queryseq,
Shortread_T headerseq, char *acc_suffix, bool invertp,
T hit5, T hit3, int pairedlength, int pairscore,
Pairtype_T pairtype, int mapq_score);
@@ -421,10 +446,12 @@ extern bool
Stage3pair_circularp (Stage3pair_T this);
extern void
-Stage3pair_print (Result_T result, Resulttype_T resulttype,
- Univ_IIT_T chromosome_iit, Shortread_T queryseq1, Shortread_T queryseq2,
- int maxpaths, bool quiet_if_excessive_p,
- bool nofailsp, bool failsonlyp, bool fastq_format_p, int quality_shift);
+Stage3pair_print_end (Filestring_T fp, Filestring_T fp_failedinput,
+ Result_T result, Resulttype_T resulttype,
+ char initchar, bool firstp, Univ_IIT_T chromosome_iit,
+ Shortread_T queryseq, Shortread_T headerseq1, Shortread_T headerseq2,
+ int maxpaths, bool quiet_if_excessive_p,
+ bool invertp, int quality_shift);
extern Stage3pair_T
Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
@@ -436,7 +463,7 @@ Stage3pair_new (T hit5, T hit3, Univcoord_T *splicesites,
struct Pair_T *
Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged, char **quality_merged,
Stage3pair_T this, Shortread_T queryseq5, Shortread_T queryseq3,
- int querylength5, int querylength3, int clipdir,
+ int querylength_5, int querylength_3, int clipdir,
int hardclip5_low, int hardclip5_high, int hardclip3_low, int hardclip3_high);
extern void
@@ -455,9 +482,11 @@ extern Stage3pair_T *
Stage3pair_eval_and_sort (int *npaths, int *first_absmq, int *second_absmq,
Stage3pair_T *stage3pairarray, int maxpaths,
Shortread_T queryseq1, Shortread_T queryseq2,
+ char *queryuc_ptr_5, char *queryrc5,
+ char *queryuc_ptr_3, char *queryrc3,
Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
- Genome_T genome, char *quality_string_5, char *quality_string_3);
+ char *quality_string_5, char *quality_string_3);
extern List_T
Stage3pair_optimal_score (List_T hitpairlist, int cutoff_level, int suboptimal_mismatches,
@@ -475,9 +504,8 @@ Stage3pair_remove_circular_alias (List_T hitpairlist);
extern List_T
Stage3_pair_up_concordant (bool *abort_pairing_p, int *found_score, int *nconcordant, int *nsamechr,
- List_T *samechr, List_T *conc_transloc, List_T *with_terminal,
+ List_T *samechr, List_T *conc_transloc,
List_T hitpairs, List_T *hitarray5, int narray5, List_T *hitarray3, int narray3,
- List_T terminals5, List_T terminals3,
int cutoff_level_5, int cutoff_level_3, int subopt_levels,
Univcoord_T *splicesites,
Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
diff --git a/src/substring.c b/src/substring.c
index e794c0c..4d72940 100644
--- a/src/substring.c
+++ b/src/substring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 161665 2015-03-23 00:03:33Z twu $";
+static char rcsid[] = "$Id: substring.c 166827 2015-06-03 06:55:46Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -17,8 +17,8 @@ static char rcsid[] = "$Id: substring.c 161665 2015-03-23 00:03:33Z twu $";
#include "complement.h"
#include "genome128_hr.h"
#include "mapq.h"
-#include "pair.h" /* For Pair_print_gsnap */
#include "pairdef.h" /* For State_T */
+#include "pair.h" /* For Pair_print_gsnap */
#include "comp.h"
@@ -236,18 +236,14 @@ struct T {
Univcoord_T chrhigh;
Chrpos_T chrlength;
- Univcoord_T left_genomicseg; /* left needed to retrieve genomicseg */
- Univcoord_T left; /* adjusted by Substring_new for aligndiff */
+ Univcoord_T left; /* for plus: alignstart - querystart(orig). for
+ minus: alignend - (querylength -
+ queryend(orig)). Set when substring is created
+ or made unambiguous, and remains constant */
+
Univcoord_T genomicstart; /* For region corresponding to entire querylength (if extrapolated) */
Univcoord_T genomicend;
- Univcoord_T genomicstart_adj; /* Needed by --clip-overlap and
- --merge-overlap when +/- querypos.
- Differs only for indels */
- Univcoord_T genomicend_adj; /* Needed by --clip-overlap and
- --merge-overlap when +/- querypos.
- Differs only for indels */
-
Endtype_T start_endtype;
Endtype_T end_endtype;
@@ -255,6 +251,7 @@ struct T {
int queryend_orig;
int querystart; /* For part that aligns to genome, post-trim */
int queryend;
+ int amb_splice_pos; /* Used for ambiguous substrings */
int querylength;
Univcoord_T alignstart; /* For part that aligns to genome, including part that is trimmed (pre-trim) */
@@ -263,11 +260,7 @@ struct T {
Univcoord_T alignstart_trim; /* For part that aligns to genome, excluding part that is trimmed (post-trim) */
Univcoord_T alignend_trim;
- int alignoffset;
- int extraleft;
- int extraright;
-
- int genomiclength;
+ int genomiclength; /* Can replace with querylength */
bool plusp;
int genestrand;
bool first_read_p;
@@ -290,12 +283,12 @@ struct T {
float mapq_loglik;
/* for splices */
- bool chimera_sensep;
+ int chimera_sensedir;
Univcoord_T splicecoord;
int splicesites_knowni; /* Needed for intragenic_splice_p in stage1hr.c */
- bool chimera_knownp;
+ bool chimera_knownp; /* Used for computing Substring_nchimera_known */
bool chimera_novelp;
Univcoord_T chimera_modelpos;
int chimera_pos;
@@ -311,6 +304,18 @@ struct T {
Univcoord_T chimera_modelpos_2;
int chimera_pos_2;
double chimera_prob_2;
+
+ double siteA_prob;
+ double siteD_prob;
+
+ bool ambiguous_p;
+ int nambcoords;
+ Univcoord_T *ambcoords;
+ int *amb_knowni;
+ int *amb_nmismatches;
+ double *amb_probs;
+ double amb_common_prob;
+ bool amb_donor_common_p;
};
@@ -321,12 +326,9 @@ Substring_alias_circular (T this) {
if (this != NULL) {
chrlength = this->chrlength;
- this->left_genomicseg += chrlength;
this->left += chrlength;
this->genomicstart += chrlength;
this->genomicend += chrlength;
- this->genomicstart_adj += chrlength;
- this->genomicend_adj += chrlength;
this->alignstart += chrlength;
this->alignend += chrlength;
this->alignstart_trim += chrlength;
@@ -346,12 +348,9 @@ Substring_unalias_circular (T this) {
if (this != NULL) {
chrlength = this->chrlength;
- this->left_genomicseg -= chrlength;
this->left -= chrlength;
this->genomicstart -= chrlength;
this->genomicend -= chrlength;
- this->genomicstart_adj -= chrlength;
- this->genomicend_adj -= chrlength;
this->alignstart -= chrlength;
this->alignend -= chrlength;
this->alignstart_trim -= chrlength;
@@ -613,6 +612,8 @@ trim_left_end (Compress_T query_compress, Univcoord_T left, int querystart, int
trim5 = 0;
if (plusp == true) {
+ debug8(printf("Calling Genome_mismatches_right_trim with left %u, pos5 %d, pos3 %d\n",
+ left,querystart,queryend));
nmismatches = Genome_mismatches_right_trim(mismatch_positions,/*max_mismatches*/alignlength,
query_compress,left,/*pos5*/querystart,/*pos3*/queryend,
plusp,genestrand,first_read_p);
@@ -642,6 +643,8 @@ trim_left_end (Compress_T query_compress, Univcoord_T left, int querystart, int
}
} else {
+ debug8(printf("Calling Genome_mismatches_left_trim with left %u, pos5 %d - %d, pos3 %d - %d\n",
+ left,querylength,queryend,querylength,querystart));
nmismatches = Genome_mismatches_left_trim(mismatch_positions,/*max_mismatches*/alignlength,
query_compress,left,/*pos5*/querylength - queryend,
/*pos3*/querylength - querystart,plusp,genestrand,first_read_p);
@@ -1003,19 +1006,25 @@ Substring_bad_stretch_p (T this, Compress_T query_compress_fwd, Compress_T query
}
-
-
-
void
Substring_free (T *old) {
- if ((*old)->genomic_bothdiff != NULL) {
- if ((*old)->genomic_refdiff != (*old)->genomic_bothdiff) {
- FREE_OUT((*old)->genomic_refdiff);
+ if (*old) {
+ debug2(printf("Freeing substring %p\n",*old));
+ if ((*old)->nambcoords > 0) {
+ FREE((*old)->ambcoords);
+ FREE((*old)->amb_knowni);
+ FREE((*old)->amb_nmismatches);
+ FREE((*old)->amb_probs);
+ }
+ if ((*old)->genomic_bothdiff != NULL) {
+ if ((*old)->genomic_refdiff != (*old)->genomic_bothdiff) {
+ FREE_OUT((*old)->genomic_refdiff);
+ }
+ FREE_OUT((*old)->genomic_bothdiff);
}
- FREE_OUT((*old)->genomic_bothdiff);
- }
- FREE_OUT(*old);
+ FREE_OUT(*old);
+ }
return;
}
@@ -1141,11 +1150,11 @@ Chrpos_T
Substring_insert_length (T substring5, T substring3) {
Univcoord_T pos5, pos3;
- pos5 = substring5->genomicstart_adj;
- debug3(printf("pos5 %u\n",substring5->genomicstart_adj));
+ pos5 = substring5->genomicstart;
+ debug3(printf("pos5 %u\n",substring5->genomicstart));
- pos3 = substring3->genomicend_adj;
- debug3(printf("pos3 %u\n",substring3->genomicend_adj));
+ pos3 = substring3->genomicend;
+ debug3(printf("pos3 %u\n",substring3->genomicend));
if (pos5 > pos3) {
return pos5 - pos3;
@@ -1283,7 +1292,6 @@ mark_mismatches_cmet_gsnap (char *gbuffer, char *query, int start, int end, int
int i;
debug1(printf("\n"));
- debug1(printf("first_read_p %d\n",first_read_p));
debug1(printf("query: %s\n",query));
debug1(printf("genome: %s\n",gbuffer));
debug1(printf("count: "));
@@ -1502,19 +1510,28 @@ Substring_setup (bool print_nsnpdiffs_p_in, bool print_snplabels_p_in,
static char *
embellish_genomic (char *genomic_diff, char *query, int querystart, int queryend, int querylength,
- int alignoffset, int extraleft, int extraright, int genestrand) {
+ int extraleft, int extraright, int genestrand) {
char *result;
int i, j, k;
+ debug1(printf("Entered embellish_genomic with querystart %d, queryend %d, querylength %d, genomic_diff %s\n",
+ querystart,queryend,querylength,genomic_diff));
+
+#ifdef DEBUG1
+ result = (char *) CALLOC_OUT(querylength+1,sizeof(char));
+#else
result = (char *) MALLOC_OUT((querylength+1) * sizeof(char));
+#endif
result[querylength] = '\0';
/* Add aligned region with lower-case diffs, surrounded by dashes */
fill_w_dashes(result,0,querystart);
- debug2(printf("g1: %s\n",result));
+ debug1(printf("g1: %s (%d..%d) extraleft:%d extraright:%d\n",result,querystart,queryend,extraleft,extraright));
- strncpy(&(result[querystart]),&(genomic_diff[alignoffset]),queryend-querystart);
- debug2(printf("g1: %s\n",result));
+ /* Don't need to know adj anymore, because each substring has its own left */
+ debug1(printf("Copying from genomic_diff[%d] to result[%d] for a length of %d - %d\n",querystart,querystart,queryend,querystart));
+ strncpy(&(result[querystart]),&(genomic_diff[querystart]),queryend-querystart);
+ debug1(printf("g1: %s (%d..%d) extraleft:%d extraright:%d\n",result,querystart,queryend,extraleft,extraright));
if (mode == STANDARD) {
/* Skip */
@@ -1527,24 +1544,23 @@ embellish_genomic (char *genomic_diff, char *query, int querystart, int queryend
}
fill_w_dashes(result,queryend,querylength);
- debug2(printf("g1: %s\n",result));
+ debug1(printf("g1: %s\n",result));
/* Add terminal ends as lower-case */
- for (k = 0, i = querystart-1, j = alignoffset-1; k < extraleft && i >= 0; k++, i--, j--) {
+ for (k = 0, i = querystart-1, j = querystart-1; k < extraleft && i >= 0 /*&& j >= 0*/; k++, i--, j--) {
result[i] = (char) tolower(genomic_diff[j]);
-#if 0
- printf("k=%d i=%d result[i]=%c\n",k,i,result[i]);
-#endif
+ /* printf("k=%d i=%d result[i]=%c\n",k,i,result[i]); */
assert(result[i] == 'a' || result[i] == 'c' || result[i] == 'g' || result[i] == 't' || result[i] == 'n');
- }
- for (k = 0, i = queryend, j = alignoffset+queryend-querystart; k < extraright && i < querylength; k++, i++, j++) {
+ }
+
+ for (k = 0, i = queryend, j = queryend; k < extraright && i < querylength /*&& j < genomiclength*/; k++, i++, j++) {
result[i] = (char) tolower(genomic_diff[j]);
+ /* printf("k=%d i=%d result[i]=%c\n",k,i,result[i]); */
#if 0
- printf("k=%d i=%d result[i]=%c\n",k,i,result[i]);
-#endif
assert(result[i] == 'a' || result[i] == 'c' || result[i] == 'g' || result[i] == 't' || result[i] == 'n');
+#endif
}
- debug2(printf("g1: %s\n",result));
+ debug1(printf("g1: %s\n",result));
return result;
}
@@ -1552,14 +1568,14 @@ embellish_genomic (char *genomic_diff, char *query, int querystart, int queryend
static char *
embellish_genomic_sam (char *genomic_diff, char *query, int querystart, int queryend, int querylength,
- int genomiclength, int alignoffset, int genestrand) {
+ int genomiclength, int genestrand, bool exactp) {
char *result;
int i, j, k;
result = (char *) MALLOC_OUT((querylength+1) * sizeof(char));
result[querylength] = '\0';
- strncpy(&(result[querystart]),&(genomic_diff[alignoffset]),queryend-querystart);
+ strncpy(&(result[querystart]),&(genomic_diff[querystart]),queryend-querystart);
if (mode == STANDARD) {
/* Skip */
@@ -1572,51 +1588,45 @@ embellish_genomic_sam (char *genomic_diff, char *query, int querystart, int quer
}
/* Add terminal ends as lower-case */
- for (k = 0, i = querystart-1, j = alignoffset-1; i >= 0 && j >= 0; k++, i--, j--) {
+ for (k = 0, i = querystart-1, j = querystart-1; i >= 0 && j >= 0; k++, i--, j--) {
if (query[i] == genomic_diff[j]) {
result[i] = genomic_diff[j];
} else {
result[i] = (char) tolower(genomic_diff[j]);
}
-#if 0
- printf("k=%d i=%d j=%d result[i]=%c\n",k,i,j,result[i]);
-#endif
+ /* printf("k=%d i=%d j=%d result[i]=%c\n",k,i,j,result[i]); */
}
- for (k = 0, i = queryend, j = alignoffset+queryend-querystart; i < querylength && j < genomiclength; k++, i++, j++) {
- if (query[i] == genomic_diff[j]) {
- result[i] = genomic_diff[j];
- } else {
- result[i] = (char) tolower(genomic_diff[j]);
+ if (exactp == true) {
+ /* No need to mark mismatches */
+ } else {
+ for (k = 0, i = queryend, j = queryend; i < querylength && j < genomiclength; k++, i++, j++) {
+ if (query[i] == genomic_diff[j]) {
+ result[i] = genomic_diff[j];
+ assert(result[i] == 'A' || result[i] == 'C' || result[i] == 'G' || result[i] == 'T' || result[i] == 'N');
+ } else {
+ result[i] = (char) tolower(genomic_diff[j]);
+ assert(result[i] == 'a' || result[i] == 'c' || result[i] == 'g' || result[i] == 't' || result[i] == 'n');
+ }
+ /* printf("k=%d i=%d j=%d result[i]=%c\n",k,i,j,result[i]); */
}
-#if 0
- printf("k=%d i=%d j=%d result[i]=%c\n",k,i,j,result[i]);
-#endif
}
return result;
}
-
-
-
-/************************************************************************/
-
-
+/* Want querylength and not querylength_adj */
T
Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
- Univcoord_T chrhigh, Chrpos_T chrlength, Univcoord_T left,
- Univcoord_T genomicstart, Univcoord_T genomicend,
- Univcoord_T genomicstart_adj, Univcoord_T genomicend_adj,
+ Univcoord_T chrhigh, Chrpos_T chrlength,
Compress_T query_compress, Endtype_T start_endtype, Endtype_T end_endtype,
int querystart, int queryend, int querylength,
Univcoord_T alignstart, Univcoord_T alignend, int genomiclength,
- int extraleft, int extraright, bool exactp,
- bool plusp, int genestrand, bool first_read_p,
- bool trim_left_p, bool trim_right_p, int minlength) {
+ bool exactp, bool plusp, int genestrand, bool first_read_p,
+ bool trim_left_p, bool trim_right_p, int outofbounds_start, int outofbounds_end,
+ int minlength) {
T new;
- int aligndiff;
int nmatches;
double prob1, prob2;
int nonterminal_trim = 0;
@@ -1624,12 +1634,15 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
/* General test for goodness over original region */
nmatches = queryend - querystart - nmismatches_whole;
+ debug2(printf("Initial nmatches %d = queryend %d - querystart %d - nmismatches_whole %d\n",
+ nmatches,queryend,querystart,nmismatches_whole));
if (nmatches - 3*nmismatches_whole < 0) {
- debug2(printf("Substring fails general test for goodness with %d matches and %d mismatches\n",
+ debug2(printf("Substring fails general test 1 for goodness with %d matches and %d mismatches\n",
nmatches,nmismatches_whole));
return (T) NULL;
} else {
new = (T) MALLOC_OUT(sizeof(*new));
+ debug2(printf("substring %p:\n",new));
}
new->exactp = exactp;
@@ -1639,66 +1652,80 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
new->chrhigh = chrhigh;
new->chrlength = chrlength;
- new->left_genomicseg = left;
- new->genomicstart = genomicstart;
- new->genomicend = genomicend;
+ /* new->genomicstart = genomicstart; */
+ /* new->genomicend = genomicend; */
+#if 0
new->genomicstart_adj = genomicstart_adj;
new->genomicend_adj = genomicend_adj;
+#endif
new->start_endtype = start_endtype;
new->end_endtype = end_endtype;
new->querystart_orig = new->querystart = querystart;
new->queryend_orig = new->queryend = queryend;
+ new->amb_splice_pos = 0;
new->querylength = querylength;
new->alignstart = new->alignstart_trim = alignstart;
new->alignend = new->alignend_trim = alignend;
- new->extraleft = extraleft;
- new->extraright = extraright;
-
new->genomiclength = genomiclength;
new->plusp = plusp;
new->genestrand = genestrand;
new->first_read_p = first_read_p;
+ new->chimera_prob = 0.0;
new->chimera_knownp = false;
new->chimera_knownp_2 = false;
new->chimera_novelp = false;
new->chimera_novelp_2 = false;
- debug2(printf("Entered Substring_new with chrnum %d (chroffset %u, chrhigh %u)\n",
- chrnum,chroffset,chrhigh));
+ debug2(printf("\n***Entered Substring_new with query %d..%d, chrnum %d (chroffset %u, chrhigh %u), plusp %d\n",
+ querystart,queryend,chrnum,chroffset,chrhigh,plusp));
/* Compute coordinates */
if (plusp == true) {
- new->alignoffset = alignstart - genomicstart;
- aligndiff = /* (alignstart - genomicstart) - querystart = */ new->alignoffset - querystart;
- left += aligndiff;
- new->left = left;
-
- debug2(printf("\n"));
- debug2(printf("querylength is %d, genomiclength is %d, alignstart is %u, alignend is %u, genomicstart is %u, genomicend is %u, alignoffset is %d\n",
- querylength,genomiclength,alignstart,alignend,genomicstart,genomicend,new->alignoffset));
+ new->left = alignstart - querystart;
+ new->genomicstart = new->left;
+ new->genomicend = new->left + querylength;
+
+ debug2(printf("left is %u\n",new->left));
+ debug2(printf("querylength is %d, genomiclength is %d, alignstart is %u, alignend is %u\n",
+ querylength,genomiclength,alignstart,alignend));
+ assert(alignstart + outofbounds_start >= chroffset);
+ assert(alignend - outofbounds_end <= chrhigh);
} else {
- new->alignoffset = genomicstart - alignstart;
- aligndiff = (alignend - genomicend) - (querylength - queryend);
- left += aligndiff;
- new->left = left;
+ new->left = alignend - (querylength - queryend);
+ new->genomicend = new->left;
+ new->genomicstart = new->left + querylength;
- debug2(printf("\n"));
- debug2(printf("querylength is %d, genomiclength is %d, alignstart is %u, alignend is %u, genomicstart is %u, genomicend is %u, alignoffset is %d\n",
- querylength,genomiclength,alignstart,alignend,genomicstart,genomicend,new->alignoffset));
+ debug2(printf("left is %u\n",new->left));
+ debug2(printf("querylength is %d, genomiclength is %d, alignstart is %u, alignend is %u\n",
+ querylength,genomiclength,alignstart,alignend));
+ assert(alignstart - outofbounds_start <= chrhigh);
+ assert(alignend + outofbounds_end >= chroffset);
+ }
+
+ if (nmismatches_whole < 0) {
+ debug2(printf("Counting mismatches from querystart %d to queryend %d\n",querystart,queryend));
+ if (plusp == true) {
+ nmismatches_whole =
+ Genome_count_mismatches_substring(query_compress,new->left,/*pos5*/querystart,
+ /*pos3*/queryend,/*plusp*/true,genestrand,first_read_p);
+ } else {
+ nmismatches_whole =
+ Genome_count_mismatches_substring(query_compress,new->left,/*pos5*/querylength - queryend,
+ /*pos3*/querylength - querystart,/*plusp*/false,genestrand,first_read_p);
+ }
}
- assert(alignstart <= chrhigh);
- assert(alignend <= chrhigh);
/* Assign new->nmismatches_whole */
new->nmismatches_whole = nmismatches_whole;
- /* Initialize these so Substring_free knows what to do */
+ /* Initialize these so an aborted Substring_free knows what to do */
+ new->nambcoords = 0;
new->genomic_bothdiff = (char *) NULL;
new->genomic_refdiff = (char *) NULL;
@@ -1706,30 +1733,56 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
debug8(printf("trim_left_p %d, trim_right_p %d\n",trim_left_p,trim_right_p));
if (trim_left_p == false) {
- new->trim_left = 0;
+ if (outofbounds_start > 0) {
+ /* Not allowed to trim */
+ debug2(printf("outofbounds_start %d > 0, so returning NULL\n",outofbounds_start));
+ Substring_free(&new);
+ return (T) NULL;
+ } else {
+ new->trim_left = 0;
+ }
} else if (new->start_endtype == TERM) {
/* Accept true terminals generated by GSNAP procedure */
- new->trim_left = trim_left_end(query_compress,left,querystart,queryend,querylength,plusp,genestrand,first_read_p,
+ new->trim_left = trim_left_end(query_compress,new->left,querystart,queryend,querylength,plusp,genestrand,first_read_p,
/*trim_mismatch_score*/-3);
+ if (outofbounds_start > new->trim_left) {
+ new->trim_left = outofbounds_start;
+ }
} else {
- new->trim_left = trim_left_end(query_compress,left,querystart,queryend,querylength,plusp,genestrand,first_read_p,
+ new->trim_left = trim_left_end(query_compress,new->left,querystart,queryend,querylength,plusp,genestrand,first_read_p,
trim_mismatch_score);
+ if (outofbounds_start > new->trim_left) {
+ new->trim_left = outofbounds_start;
+ }
nonterminal_trim += new->trim_left;
}
if (trim_right_p == false) {
- new->trim_right = 0;
+ if (outofbounds_end > 0) {
+ /* Not allowed to trim */
+ Substring_free(&new);
+ debug2(printf("outofbounds_end %d > 0, so returning NULL\n",outofbounds_end));
+ return (T) NULL;
+ } else {
+ new->trim_right = 0;
+ }
} else if (new->end_endtype == TERM) {
/* Accept true terminals generated by GSNAP procedure */
- new->trim_right = trim_right_end(query_compress,left,querystart,queryend,querylength,plusp,genestrand,first_read_p,
+ new->trim_right = trim_right_end(query_compress,new->left,querystart,queryend,querylength,plusp,genestrand,first_read_p,
/*trim_mismatch_score*/-3);
+ if (outofbounds_end > new->trim_right) {
+ new->trim_right = outofbounds_end;
+ }
} else {
- new->trim_right = trim_right_end(query_compress,left,querystart,queryend,querylength,plusp,genestrand,first_read_p,
+ new->trim_right = trim_right_end(query_compress,new->left,querystart,queryend,querylength,plusp,genestrand,first_read_p,
trim_mismatch_score);
+ if (outofbounds_end > new->trim_right) {
+ new->trim_right = outofbounds_end;
+ }
nonterminal_trim += new->trim_right;
}
@@ -1753,6 +1806,7 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
}
}
+
/* ? Should we spend the time to determine trim_left_splicep and
trim_right_splicep, especially since trimming may not be perfect */
if (plusp == true) {
@@ -1761,15 +1815,18 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
new->alignstart_trim += new->trim_left;
new->alignend_trim -= new->trim_right;
-
+ debug2(printf("Got trims of %d and %d => Revised alignstart_trim and alignend_trim to be %u..%u (%u..%u)\n",
+ new->trim_left,new->trim_right,new->alignstart_trim,new->alignend_trim,
+ new->alignstart_trim - new->chroffset,new->alignend_trim - new->chroffset));
+
if (novelsplicingp == false) {
new->trim_left_splicep = new->trim_right_splicep = false;
} else {
if (new->trim_left == 0) {
new->trim_left_splicep = false;
} else {
- prob1 = Maxent_hr_acceptor_prob(left + new->trim_left,chroffset);
- prob2 = Maxent_hr_antidonor_prob(left + new->trim_left,chroffset);
+ prob1 = Maxent_hr_acceptor_prob(new->left + new->trim_left,chroffset);
+ prob2 = Maxent_hr_antidonor_prob(new->left + new->trim_left,chroffset);
/* fprintf(stderr,"At %u, acceptor prob %f, antidonor prob %f\n",left+new->trim_left,prob1,prob2); */
if (prob1 > 0.90 || prob2 > 0.90) {
new->trim_left_splicep = true;
@@ -1781,8 +1838,8 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
if (new->trim_right == 0) {
new->trim_right_splicep = false;
} else {
- prob1 = Maxent_hr_donor_prob(left + querylength - new->trim_right,chroffset);
- prob2 = Maxent_hr_antiacceptor_prob(left + querylength - new->trim_right,chroffset);
+ prob1 = Maxent_hr_donor_prob(new->left + querylength - new->trim_right,chroffset);
+ prob2 = Maxent_hr_antiacceptor_prob(new->left + querylength - new->trim_right,chroffset);
/* fprintf(stderr,"At %u, donor prob %f, antiacceptor prob %f\n",left + querylength - new->trim_right,prob1,prob2); */
if (prob1 > 0.90 || prob2 > 0.90) {
new->trim_right_splicep = true;
@@ -1798,6 +1855,10 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
new->alignstart_trim -= new->trim_left;
new->alignend_trim += new->trim_right;
+ debug2(printf("Revised alignstart_trim and alignend_trim to be %u..%u (%u..%u)\n",
+ new->alignstart_trim,new->alignend_trim,
+ new->alignstart_trim - new->chroffset,new->alignend_trim - new->chroffset));
+
if (novelsplicingp == false) {
new->trim_left_splicep = new->trim_right_splicep = false;
@@ -1805,9 +1866,9 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
if (new->trim_left == 0) {
new->trim_left_splicep = false;
} else {
- prob1 = Maxent_hr_donor_prob(left + querylength - new->trim_left,chroffset);
- prob2 = Maxent_hr_antiacceptor_prob(left + querylength - new->trim_left,chroffset);
- /* fprintf(stderr,"At %u, donor prob %f, antiacceptor prob %f\n",left + querylength - new->trim_left,prob1,prob2); */
+ prob1 = Maxent_hr_donor_prob(new->left + querylength - new->trim_left,chroffset);
+ prob2 = Maxent_hr_antiacceptor_prob(new->left + querylength - new->trim_left,chroffset);
+ /* fprintf(stderr,"At %u, donor prob %f, antiacceptor prob %f\n",new->left + querylength - new->trim_left,prob1,prob2); */
if (prob1 > 0.90 || prob2 > 0.90) {
new->trim_left_splicep = true;
} else {
@@ -1818,9 +1879,9 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
if (new->trim_right == 0) {
new->trim_right_splicep = false;
} else {
- prob1 = Maxent_hr_acceptor_prob(left + new->trim_right,chroffset);
- prob2 = Maxent_hr_antidonor_prob(left + new->trim_right,chroffset);
- /* fprintf(stderr,"At %u, acceptor prob %f, antidonor prob %f\n",left+new->trim_right,prob1,prob2); */
+ prob1 = Maxent_hr_acceptor_prob(new->left + new->trim_right,chroffset);
+ prob2 = Maxent_hr_antidonor_prob(new->left + new->trim_right,chroffset);
+ /* fprintf(stderr,"At %u, acceptor prob %f, antidonor prob %f\n",new->left+new->trim_right,prob1,prob2); */
if (prob1 > 0.90 || prob2 > 0.90) {
new->trim_right_splicep = true;
} else {
@@ -1837,31 +1898,229 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
} else if (plusp == true) {
new->nmismatches_bothdiff =
- Genome_count_mismatches_substring(query_compress,left,/*pos5*/new->alignstart_trim-left,
- /*pos3*/new->alignend_trim-left,plusp,genestrand,first_read_p);
+ Genome_count_mismatches_substring(query_compress,new->left,/*pos5*/new->alignstart_trim-new->left,
+ /*pos3*/new->alignend_trim - new->left,plusp,genestrand,first_read_p);
} else {
new->nmismatches_bothdiff =
- Genome_count_mismatches_substring(query_compress,left,/*pos5*/new->alignend_trim-left,
- /*pos3*/new->alignstart_trim-left,plusp,genestrand,first_read_p);
+ Genome_count_mismatches_substring(query_compress,new->left,/*pos5*/new->alignend_trim - new->left,
+ /*pos3*/new->alignstart_trim - new->left,plusp,genestrand,first_read_p);
}
/* General test for goodness over trimmed region */
nmatches = new->queryend - new->querystart - new->nmismatches_bothdiff;
+ debug2(printf("nmatches %d = queryend %d - querystart %d - nmismatches_bothdiff %d\n",
+ nmatches,new->queryend,new->querystart,new->nmismatches_bothdiff));
if (nmatches - 3*new->nmismatches_bothdiff < 0) {
- debug2(printf("Substring fails general test for goodness with %d matches and %d mismatches\n",
+ debug2(printf("Substring fails general test 2 for goodness with %d matches and %d mismatches\n",
nmatches,new->nmismatches_bothdiff));
Substring_free(&new);
return (T) NULL;
}
+ new->ambiguous_p = false;
+ new->nambcoords = 0;
+ new->ambcoords = (Univcoord_T *) NULL;
+ new->amb_knowni = (int *) NULL;
+ new->amb_nmismatches = (int *) NULL;
+ new->amb_probs = (double *) NULL;
+ new->amb_common_prob = 0.0;
+ new->amb_donor_common_p = false;
+
+ debug2(printf("Returning substring %p\n",new));
+ return new;
+}
+
+
+T
+Substring_new_ambig (int querystart, int queryend, int splice_pos, int querylength,
+ Chrnum_T chrnum, Univcoord_T chroffset,
+ Univcoord_T chrhigh, Chrpos_T chrlength,
+ int genomiclength, bool plusp, int genestrand, bool first_read_p,
+#ifdef LARGE_GENOMES
+ Uint8list_T ambcoords,
+#else
+ Uintlist_T ambcoords,
+#endif
+ Intlist_T amb_knowni, Intlist_T amb_nmismatches, Doublelist_T amb_probs,
+ double amb_common_prob, bool amb_donor_common_p, bool substring1p) {
+ int ignore;
+ T new = (T) MALLOC(sizeof(*new));
+
+ debug2(printf("Entered Substring_new_ambig with chrnum %d (chroffset %u, chrhigh %u), %d..%d, querylength %d, plusp %d\n",
+ chrnum,chroffset,chrhigh,querystart,queryend,querylength,plusp));
+
+ new->exactp = false;
+
+ new->chrnum = chrnum;
+ new->chroffset = chroffset;
+ new->chrhigh = chrhigh;
+ new->chrlength = chrlength;
+
+ new->left = 0;
+#ifdef LARGE_GENOMES
+ if (plusp == true) {
+ new->genomicstart = Uint8list_max(ambcoords);
+ new->genomicend = Uint8list_min(ambcoords);
+ } else {
+ new->genomicstart = Uint8list_min(ambcoords);
+ new->genomicend = Uint8list_max(ambcoords);
+ }
+#else
+ if (plusp == true) {
+ new->genomicstart = Uintlist_max(ambcoords);
+ new->genomicend = Uintlist_min(ambcoords);
+ } else {
+ new->genomicstart = Uintlist_min(ambcoords);
+ new->genomicend = Uintlist_max(ambcoords);
+ }
+#endif
+ new->start_endtype = END;
+ new->end_endtype = END;
+
+ new->querystart_orig = new->querystart = querystart;
+ new->queryend_orig = new->queryend = queryend;
+ new->amb_splice_pos = splice_pos;
+ new->querylength = querylength;
+
+ new->alignstart = new->alignstart_trim = 0;
+ new->alignend = new->alignend_trim = 0;
+
+ new->genomiclength = genomiclength;
+ new->plusp = plusp;
+ new->genestrand = genestrand;
+ new->first_read_p = first_read_p;
+
+ new->chimera_prob = 0.0;
+ new->chimera_knownp = false;
+ new->chimera_knownp_2 = false;
+ new->chimera_novelp = false;
+ new->chimera_novelp_2 = false;
+
+ new->nmismatches_bothdiff = new->nmismatches_whole = Intlist_min(amb_nmismatches);
+
+#if 0
+ if (plusp == true) {
+ /* Fails because alignstart and alignend are not known */
+ new->nmatches = (new->alignend_trim - new->alignstart_trim) - new->nmismatches_whole;
+ } else {
+ new->alignoffset = querylength - queryend;
+ /* Fails because alignstart and alignend are not known */
+ new->nmatches = (new->alignstart_trim - new->alignend_trim) - new->nmismatches_whole;
+ }
+#endif
+ new->nmatches = (queryend - querystart) - new->nmismatches_whole;
+
+ new->genomic_bothdiff = (char *) NULL;
+ new->genomic_refdiff = (char *) NULL;
+ if (substring1p == true) {
+ new->trim_left = querystart;
+ new->trim_right = 0;
+ } else {
+ new->trim_left = 0;
+ new->trim_right = querylength - queryend;
+ }
+ new->trim_left_splicep = new->trim_right_splicep = false;
+
+
+ new->ambiguous_p = true;
+#ifdef LARGE_GENOMES
+ new->ambcoords = Uint8list_to_array_out(&new->nambcoords,ambcoords);
+#else
+ new->ambcoords = Uintlist_to_array_out(&new->nambcoords,ambcoords);
+ debug2(printf("ambcoords: %s\n",Uintlist_to_string(ambcoords)));
+#endif
+ new->amb_knowni = Intlist_to_array_out(&ignore,amb_knowni);
+ new->amb_nmismatches = Intlist_to_array_out(&ignore,amb_nmismatches);
+ new->amb_probs = Doublelist_to_array_out(&ignore,amb_probs);
+ new->amb_common_prob = amb_common_prob;
+ new->amb_donor_common_p = amb_donor_common_p;
+
return new;
}
+#if 0
+void
+Substring_unambiguous_bounds (Univcoord_T *genomicstart, Univcoord_T *genomicend, T this, int bingoi) {
+ Univcoord_T splicecoord, left;
+
+ splicecoord = this->ambcoords[bingoi];
+
+ if (this->plusp == true) {
+ left = splicecoord - this->amb_splice_pos;
+ *genomicstart = left;
+ *genomicend = left + this->querylength;
+
+ } else {
+ left = splicecoord - (this->querylength - this->amb_splice_pos);
+ *genomicend = left;
+ *genomicstart = left + this->querylength;
+ }
+
+ return;
+}
+#endif
+
+
+Univcoord_T
+Substring_set_unambiguous (double *donor_prob, double *acceptor_prob, Univcoord_T *genomicstart, Univcoord_T *genomicend,
+ T this, int bingoi) {
+
+ debug2(printf("Entered Substring_set_unambiguous\n"));
+
+ this->splicecoord = this->ambcoords[bingoi];
+ this->splicesites_knowni = this->amb_knowni[bingoi];
+ this->nmismatches_whole = this->amb_nmismatches[bingoi];
+ this->chimera_prob = this->amb_probs[bingoi];
+
+ if (this->plusp == true) {
+ this->left = this->splicecoord - this->amb_splice_pos;
+ debug2(printf("left %u\n",this->left));
+ *genomicstart = this->genomicstart = this->left;
+ *genomicend = this->genomicend = this->left + this->querylength;
+ this->alignstart = this->alignstart_trim = this->genomicstart + this->querystart;
+ this->alignend = this->alignend_trim = this->genomicstart + this->queryend;
+ this->nmatches = (this->alignend - this->alignstart) - this->nmismatches_whole;
+
+ debug2(printf("querypos %d..%d, genomiclength is %d, alignstart is %u (%u), alignend is %u (%u), genomicstart is %u, genomicend is %u\n",
+ this->querystart,this->queryend,this->genomiclength,this->alignstart,this->alignstart - this->chroffset,
+ this->alignend,this->alignend - this->chroffset,this->genomicstart,this->genomicend));
+
+ } else {
+ this->left = this->splicecoord - (this->querylength - this->amb_splice_pos);
+ debug2(printf("left %u\n",this->left));
+ *genomicend = this->genomicend = this->left;
+ *genomicstart = this->genomicstart = this->left + this->querylength;
+ this->alignend = this->alignend_trim = this->genomicstart - this->queryend;
+ this->alignstart = this->alignstart_trim = this->genomicstart - this->querystart;
+ this->nmatches = (this->alignstart - this->alignend) - this->nmismatches_whole;
+
+ debug2(printf("querypos %d..%d, genomiclength is %d, alignstart is %u (%u), alignend is %u (%u), genomicstart is %u, genomicend is %u\n",
+ this->querystart,this->queryend,this->genomiclength,this->alignstart,this->alignstart - this->chroffset,
+ this->alignend,this->alignend - this->chroffset,this->genomicstart,this->genomicend));
+ }
+
+ if (this->amb_donor_common_p == true) {
+ *donor_prob = this->amb_common_prob;
+ *acceptor_prob = this->amb_probs[bingoi];
+ } else {
+ *acceptor_prob = this->amb_common_prob;
+ *donor_prob = this->amb_probs[bingoi];
+ }
+
+ this->ambiguous_p = false;
+
+ return this->left;
+}
+
+
/* Look also at Pair_compute_mapq */
float
Substring_compute_mapq (T this, Compress_T query_compress, char *quality_string, bool trim_terminals_p) {
int mapq_start, mapq_end;
+ float best_loglik, loglik;
+ Univcoord_T left, splicecoord;
+ int i;
/* mapq */
mapq_start = this->querystart_orig;
@@ -1884,11 +2143,45 @@ Substring_compute_mapq (T this, Compress_T query_compress, char *quality_string,
if (this->exactp == true) {
/* this->mapq_loglik = MAPQ_loglik_exact(quality_string,0,querylength); */
this->mapq_loglik = 0.0;
+
+ } else if (this->ambiguous_p == true) {
+ if (this->plusp == true) {
+ splicecoord = this->ambcoords[0];
+ left = splicecoord - this->amb_splice_pos;
+ best_loglik = MAPQ_loglik(query_compress,left,mapq_start,mapq_end,
+ this->querylength,quality_string,/*plusp*/true,this->genestrand,this->first_read_p);
+ for (i = 1; i < this->nambcoords; i++) {
+ splicecoord = this->ambcoords[i];
+ left = splicecoord - this->amb_splice_pos;
+ if ((loglik = MAPQ_loglik(query_compress,left,mapq_start,mapq_end,
+ this->querylength,quality_string,/*plusp*/true,this->genestrand,this->first_read_p)) > best_loglik) {
+ best_loglik = loglik;
+ }
+ }
+ } else {
+ splicecoord = this->ambcoords[0];
+ left = splicecoord - (this->querylength - this->amb_splice_pos);
+ best_loglik = MAPQ_loglik(query_compress,left,mapq_start,mapq_end,
+ this->querylength,quality_string,/*plusp*/false,this->genestrand,this->first_read_p);
+ for (i = 1; i < this->nambcoords; i++) {
+ splicecoord = this->ambcoords[i];
+ left = splicecoord - (this->querylength - this->amb_splice_pos);
+ if ((loglik = MAPQ_loglik(query_compress,left,mapq_start,mapq_end,
+ this->querylength,quality_string,/*plusp*/false,this->genestrand,this->first_read_p)) > best_loglik) {
+ best_loglik = loglik;
+ }
+ }
+ }
+
+ this->mapq_loglik = best_loglik;
+
} else {
debug2(printf("trim_left %d, trim_right %d, mapq_start = %d, mapq_end = %d\n",
this->trim_left,this->trim_right,mapq_start,mapq_end));
this->mapq_loglik = MAPQ_loglik(query_compress,this->left,mapq_start,mapq_end,
this->querylength,quality_string,this->plusp,this->genestrand,this->first_read_p);
+ debug2(printf("Substring %u..%u gets loglik %f\n",this->genomicstart - this->chroffset,
+ this->genomicend - this->chroffset,this->mapq_loglik));
}
return this->mapq_loglik;
@@ -1897,51 +2190,56 @@ Substring_compute_mapq (T this, Compress_T query_compress, char *quality_string,
/* Note: query needed only for dibase */
int
-Substring_display_prep (char **deletion, T this, char *query, Compress_T query_compress_fwd, Compress_T query_compress_rev,
- Genome_T genome, int deletion_pos, int deletion_length) {
+Substring_display_prep (T this, char *queryuc_ptr, int querylength,
+ int extraleft, int extraright,
+ Compress_T query_compress_fwd, Compress_T query_compress_rev,
+ Genome_T genome) {
char *genomic_diff;
- unsigned char *gbuffer;
- int mismatch_offset;
+ char *gbuffer;
#ifndef HAVE_ALLOCA
- unsigned char gbuffer_alloc[MAX_READLENGTH/*+MAX_END_DELETIONS*/+1];
+ char gbuffer_alloc[MAX_READLENGTH/*+MAX_END_DELETIONS*/+1];
bool allocp;
#endif
- mismatch_offset = this->alignoffset - this->querystart_orig;
-
/* genomic_bothdiff, genomic_refdiff, and nmismatches_refdiff */
- if (this->exactp == true) {
+ if (0 && this->exactp == true) {
this->genomic_bothdiff = (char *) NULL;
this->genomic_refdiff = (char *) NULL;
this->nmismatches_refdiff = this->nmismatches_whole;
} else if (this->plusp == true) {
+ if (0 && this->exactp == true && extraleft == 0 && extraright == 0) {
+ /* Don't use adj here */
+ /* genomic_diff = &(queryuc_ptr[0]); */
+ this->genomic_refdiff = (char *) NULL;
+ this->nmismatches_refdiff = 0;
+
+ } else {
#ifdef HAVE_ALLOCA
- gbuffer = (unsigned char *) ALLOCA((this->genomiclength+1) * sizeof(unsigned char));
+ gbuffer = (char *) ALLOCA((this->genomiclength+1) * sizeof(char));
#else
- if (this->genomiclength < MAX_READLENGTH) {
- gbuffer = gbuffer_alloc;
- allocp = false;
- } else {
- gbuffer = (unsigned char *) MALLOC((this->genomiclength+1) * sizeof(unsigned char));
- allocp = true;
- }
+ if (this->genomiclength < MAX_READLENGTH) {
+ gbuffer = gbuffer_alloc;
+ allocp = false;
+ } else {
+ gbuffer = (char *) MALLOC((this->genomiclength+1) * sizeof(char));
+ allocp = true;
+ }
#endif
- Genome_fill_buffer_simple(genome,this->left_genomicseg,this->genomiclength,gbuffer);
- if (deletion_pos >= 0) {
- *deletion = (char *) CALLOC_OUT(deletion_length+1,sizeof(char));
- strncpy(*deletion,&(gbuffer[deletion_pos]),deletion_length);
- }
- genomic_diff = gbuffer;
+ debug1(printf("Obtaining genomic_diff from left %u (%u) for querylength %d\n",
+ this->left,this->left - this->chroffset,querylength));
+ Genome_fill_buffer_simple(genome,this->left,querylength,gbuffer);
+ genomic_diff = gbuffer;
- Genome_mark_mismatches(genomic_diff,this->querylength,query_compress_fwd,
- this->left,/*pos5*/this->querystart_orig,/*pos3*/this->queryend_orig,
- mismatch_offset,/*plusp*/true,this->genestrand,this->first_read_p);
+ Genome_mark_mismatches(genomic_diff,querylength,query_compress_fwd,
+ this->left,/*pos5*/this->querystart,/*pos3*/this->queryend,
+ /*plusp*/true,this->genestrand,this->first_read_p);
- this->genomic_bothdiff = embellish_genomic(genomic_diff,query,this->querystart_orig,this->queryend_orig,
- this->querylength,this->alignoffset,this->extraleft,this->extraright,
- this->genestrand);
+ }
+ /* Need to perform embellish to put dashes in */
+ this->genomic_bothdiff = embellish_genomic(genomic_diff,queryuc_ptr,this->querystart,this->queryend,
+ querylength,extraleft,extraright,this->genestrand);
if (snps_iit == NULL) {
this->genomic_refdiff = this->genomic_bothdiff;
@@ -1953,96 +2251,106 @@ Substring_display_prep (char **deletion, T this, char *query, Compress_T query_c
/*pos5*/this->alignstart_trim - this->left,
/*pos3*/this->alignend_trim - this->left,
/*plusp*/true,this->genestrand,this->first_read_p);
-
- Genome_mark_mismatches_ref(genomic_diff,this->querylength,query_compress_fwd,this->left,
- /*pos5*/this->querystart_orig,/*pos3*/this->queryend_orig,
- mismatch_offset,/*plusp*/true,this->genestrand,this->first_read_p);
+
+ Genome_mark_mismatches_ref(genomic_diff,querylength,query_compress_fwd,this->left,
+ /*pos5*/this->querystart,/*pos3*/this->queryend,
+ /*plusp*/true,this->genestrand,this->first_read_p);
if (output_sam_p == false) {
- this->genomic_refdiff = embellish_genomic(genomic_diff,query,this->querystart_orig,this->queryend_orig,
- this->querylength,this->alignoffset,this->extraleft,this->extraright,
- this->genestrand);
+ this->genomic_refdiff = embellish_genomic(genomic_diff,queryuc_ptr,this->querystart,this->queryend,
+ querylength,extraleft,extraright,this->genestrand);
}
}
if (output_sam_p == true) {
- this->genomic_refdiff = embellish_genomic_sam(genomic_diff,query,this->querystart_orig,this->queryend_orig,
- this->querylength,this->genomiclength,this->alignoffset,
- this->genestrand);
+ this->genomic_refdiff = embellish_genomic_sam(genomic_diff,queryuc_ptr,this->querystart,this->queryend,
+ querylength,this->genomiclength,
+ this->genestrand,this->exactp);
}
+ if (0 && this->exactp == true && extraleft == 0 && extraright == 0) {
+ } else {
#ifdef HAVE_ALLOCA
- FREEA(gbuffer);
+ FREEA(gbuffer);
#else
- if (allocp == true) {
- FREE(gbuffer);
- }
+ if (allocp == true) {
+ FREE(gbuffer);
+ }
#endif
+ }
} else {
+ if (0 && this->exactp == true && extraleft == 0 && extraright == 0) {
+ /* Don't use adj here */
+ /* genomic_diff = &(queryuc_ptr[0]); -- Not queryrc */
+ this->genomic_refdiff = (char *) NULL;
+ this->nmismatches_refdiff = 0;
+
+ } else {
#ifdef HAVE_ALLOCA
- gbuffer = (unsigned char *) ALLOCA((this->genomiclength+1) * sizeof(unsigned char));
+ gbuffer = (char *) ALLOCA((this->genomiclength+1) * sizeof(char));
#else
- if (this->genomiclength < MAX_READLENGTH) {
- gbuffer = gbuffer_alloc;
- allocp = false;
- } else {
- gbuffer = (unsigned char *) MALLOC((this->genomiclength+1) * sizeof(unsigned char));
- allocp = true;
- }
+ if (this->genomiclength < MAX_READLENGTH) {
+ gbuffer = gbuffer_alloc;
+ allocp = false;
+ } else {
+ gbuffer = (char *) MALLOC((this->genomiclength+1) * sizeof(char));
+ allocp = true;
+ }
#endif
- Genome_fill_buffer_simple(genome,this->left_genomicseg,this->genomiclength,gbuffer);
- genomic_diff = make_complement_inplace(gbuffer,this->genomiclength);
- if (deletion_pos >= 0) {
- *deletion = (char *) CALLOC_OUT(deletion_length+1,sizeof(char));
- strncpy(*deletion,&(gbuffer[deletion_pos]),deletion_length);
- }
+ debug1(printf("Obtaining genomic_diff from left %u (%u) for querylength %d, and complemented\n",
+ this->left,this->left - this->chroffset,querylength));
+ Genome_fill_buffer_simple(genome,this->left,querylength,gbuffer);
+ genomic_diff = make_complement_inplace(gbuffer,querylength);
- Genome_mark_mismatches(genomic_diff,this->querylength,query_compress_rev,
- this->left,/*pos5*/this->querylength - this->queryend_orig,
- /*pos3*/this->querylength - this->querystart_orig,
- mismatch_offset,/*plusp*/false,this->genestrand,this->first_read_p);
+ Genome_mark_mismatches(genomic_diff,querylength,query_compress_rev,
+ this->left,/*pos5*/querylength - this->queryend,
+ /*pos3*/querylength - this->querystart,
+ /*plusp*/false,this->genestrand,this->first_read_p);
+ }
- this->genomic_bothdiff = embellish_genomic(genomic_diff,query,this->querystart_orig,this->queryend_orig,
- this->querylength,this->alignoffset,this->extraleft,this->extraright,
- this->genestrand);
+ /* Need to perform embellish to put dashes in */
+ this->genomic_bothdiff = embellish_genomic(genomic_diff,/*not queryrc*/queryuc_ptr,this->querystart,this->queryend,
+ querylength,extraleft,extraright,this->genestrand);
if (snps_iit == NULL) {
this->genomic_refdiff = this->genomic_bothdiff;
this->nmismatches_refdiff = this->nmismatches_bothdiff;
-
+
} else {
this->nmismatches_refdiff =
Genome_count_mismatches_substring_ref(query_compress_rev,this->left,
/*pos5*/this->alignend_trim - this->left,
/*pos3*/this->alignstart_trim - this->left,/*plusp*/false,
this->genestrand,this->first_read_p);
-
- Genome_mark_mismatches_ref(genomic_diff,this->querylength,query_compress_rev,this->left,
- /*pos5*/this->querylength - this->queryend_orig,
- /*pos3*/this->querylength - this->querystart_orig,
- mismatch_offset,/*plusp*/false,this->genestrand,this->first_read_p);
-
+
+ Genome_mark_mismatches_ref(genomic_diff,querylength,query_compress_rev,this->left,
+ /*pos5*/querylength - this->queryend,
+ /*pos3*/querylength - this->querystart,
+ /*plusp*/false,this->genestrand,this->first_read_p);
+
if (output_sam_p == false) {
- this->genomic_refdiff = embellish_genomic(genomic_diff,query,this->querystart_orig,this->queryend_orig,
- this->querylength,this->alignoffset,this->extraleft,this->extraright,
- this->genestrand);
+ this->genomic_refdiff = embellish_genomic(genomic_diff,/*not queryrc*/queryuc_ptr,this->querystart,this->queryend,
+ querylength,extraleft,extraright,this->genestrand);
}
}
if (output_sam_p == true) {
- this->genomic_refdiff = embellish_genomic_sam(genomic_diff,query,this->querystart_orig,this->queryend_orig,
- this->querylength,this->genomiclength,this->alignoffset,
- this->genestrand);
+ this->genomic_refdiff = embellish_genomic_sam(genomic_diff,/*not queryrc*/queryuc_ptr,this->querystart,this->queryend,
+ querylength,this->genomiclength,
+ this->genestrand,this->exactp);
}
+ if (0 && this->exactp == true && extraleft == 0 && extraright == 0) {
+ } else {
#ifdef HAVE_ALLOCA
- FREEA(gbuffer);
+ FREEA(gbuffer);
#else
- if (allocp == true) {
- FREE(gbuffer);
- }
+ if (allocp == true) {
+ FREE(gbuffer);
+ }
#endif
+ }
}
return this->nmismatches_refdiff;
@@ -2050,6 +2358,11 @@ Substring_display_prep (char **deletion, T this, char *query, Compress_T query_c
Univcoord_T
+Substring_left (T this) {
+ return this->left;
+}
+
+Univcoord_T
Substring_splicecoord (T this) {
return this->splicecoord;
}
@@ -2214,6 +2527,16 @@ Substring_match_length (T this) {
}
}
+int
+Substring_match_length_amb (T this) {
+ if (this->ambiguous_p == false) {
+ return 0;
+ } else {
+ return this->queryend - this->querystart;
+ }
+}
+
+
/* Before trimming */
int
Substring_match_length_orig (T this) {
@@ -2224,7 +2547,6 @@ Substring_match_length_orig (T this) {
}
}
-/* Used only by Goby */
Chrpos_T
Substring_genomic_alignment_length (T this) {
if (this == NULL) {
@@ -2267,6 +2589,26 @@ Substring_alignend (T this) {
return this->alignend;
}
+Chrpos_T
+Substring_alignstart_chr (T this) {
+ return this->alignstart - this->chroffset;
+}
+
+Chrpos_T
+Substring_alignend_chr (T this) {
+ return this->alignend - this->chroffset;
+}
+
+Chrpos_T
+Substring_alignstart_trim_chr (T this) {
+ return this->alignstart_trim - this->chroffset;
+}
+
+Chrpos_T
+Substring_alignend_trim_chr (T this) {
+ return this->alignend_trim - this->chroffset;
+}
+
Univcoord_T
Substring_alignstart_trim (T this) {
return this->alignstart_trim;
@@ -2292,7 +2634,7 @@ Substring_alignmid_trim (T this) {
Univcoord_T
Substring_left_genomicseg (T this) {
- return this->left_genomicseg;
+ return this->left;
}
Univcoord_T
@@ -2300,9 +2642,9 @@ Substring_genomicstart (T this) {
return this->genomicstart;
}
-Univcoord_T
-Substring_genomicstart_adj (T this) {
- return this->genomicstart_adj;
+Chrpos_T
+Substring_genomicstart_chr (T this) {
+ return this->genomicstart - this->chroffset;
}
Univcoord_T
@@ -2311,21 +2653,64 @@ Substring_genomicend (T this) {
}
Chrpos_T
+Substring_genomicend_chr (T this) {
+ return this->genomicend - this->chroffset;
+}
+
+Chrpos_T
Substring_genomiclength (T this) {
return this->genomiclength;
}
-Chrpos_T
-Substring_alignstart_chr (T this) {
- return this->alignstart - this->chroffset;
+double
+Substring_amb_donor_prob (T this) {
+ double max;
+ int i;
+
+ if (this->amb_donor_common_p == true) {
+ return this->amb_common_prob;
+ } else {
+ max = this->amb_probs[0];
+ for (i = 1; i < this->nambcoords; i++) {
+ if (this->amb_probs[i] > max) {
+ max = this->amb_probs[i];
+ }
+ }
+ return max;
+ }
}
-Chrpos_T
-Substring_alignend_chr (T this) {
- return this->alignend - this->chroffset;
+double
+Substring_amb_acceptor_prob (T this) {
+ double max;
+ int i;
+
+ if (this->amb_donor_common_p == true) {
+ max = this->amb_probs[0];
+ for (i = 1; i < this->nambcoords; i++) {
+ if (this->amb_probs[i] > max) {
+ max = this->amb_probs[i];
+ }
+ }
+ return max;
+ } else {
+ return this->amb_common_prob;
+ }
}
+
+double
+Substring_siteA_prob (T this) {
+ return this->siteA_prob;
+}
+
+double
+Substring_siteD_prob (T this) {
+ return this->siteD_prob;
+}
+
+
double
Substring_chimera_prob (T this) {
return this->chimera_prob;
@@ -2379,20 +2764,47 @@ Substring_nchimera_novel (T this) {
int
Substring_chimera_sensedir (T this) {
- if (this->chimera_sensep == true) {
- return SENSE_FORWARD;
- } else {
- return SENSE_ANTI;
- }
+ return this->chimera_sensedir;
}
bool
-Substring_chimera_sensep (T this) {
- return this->chimera_sensep;
+Substring_ambiguous_p (T this) {
+ assert(this->ambiguous_p == false || this->ambiguous_p == true);
+ return this->ambiguous_p;
}
-/* circularpos measures query distance from SAM chrlow to origin */
+int
+Substring_nambcoords (T this) {
+ return this->nambcoords;
+}
+
+Univcoord_T *
+Substring_ambcoords (T this) {
+ return this->ambcoords;
+}
+
+int *
+Substring_amb_knowni (T this) {
+ return this->amb_knowni;
+}
+
+int *
+Substring_amb_nmismatches (T this) {
+ return this->amb_nmismatches;
+}
+
+double *
+Substring_amb_probs (T this) {
+ return this->amb_probs;
+}
+
+
+
+
+
+
+/* circularpos measures query distance from SAM chrlow to origin */
int
Substring_circularpos (T this) {
if (this == NULL) {
@@ -2426,9 +2838,10 @@ Substring_copy (T old) {
T new;
if (old == NULL) {
- return NULL;
+ return (T) NULL;
} else {
new = (T) MALLOC_OUT(sizeof(*new));
+ debug2(printf("substring %p is a copy of %p\n",new,old));
new->exactp = old->exactp;
new->nmismatches_whole = old->nmismatches_whole;
@@ -2446,12 +2859,9 @@ Substring_copy (T old) {
new->chrhigh = old->chrhigh;
new->chrlength = old->chrlength;
- new->left_genomicseg = old->left_genomicseg;
new->left = old->left;
new->genomicstart = old->genomicstart;
new->genomicend = old->genomicend;
- new->genomicstart_adj = old->genomicstart_adj;
- new->genomicend_adj = old->genomicend_adj;
new->start_endtype = old->start_endtype;
new->end_endtype = old->end_endtype;
@@ -2460,6 +2870,7 @@ Substring_copy (T old) {
new->queryend_orig = old->queryend_orig;
new->querystart = old->querystart;
new->queryend = old->queryend;
+ new->amb_splice_pos = old->amb_splice_pos;
new->querylength = old->querylength;
new->alignstart = old->alignstart;
@@ -2468,10 +2879,6 @@ Substring_copy (T old) {
new->alignstart_trim = old->alignstart_trim;
new->alignend_trim = old->alignend_trim;
- new->alignoffset = old->alignoffset;
- new->extraleft = old->extraleft;
- new->extraright = old->extraright;
-
new->genomiclength = old->genomiclength;
new->plusp = old->plusp;
new->genestrand = old->genestrand;
@@ -2493,7 +2900,7 @@ Substring_copy (T old) {
new->mapq_loglik = old->mapq_loglik;
- new->chimera_sensep = old->chimera_sensep;
+ new->chimera_sensedir = old->chimera_sensedir;
new->splicecoord = old->splicecoord;
new->splicesites_knowni = old->splicesites_knowni;
@@ -2511,19 +2918,209 @@ Substring_copy (T old) {
new->chimera_pos_2 = old->chimera_pos_2;
new->chimera_prob_2 = old->chimera_prob_2;
+ new->ambiguous_p = old->ambiguous_p;
+ if (old->nambcoords == 0) {
+ new->nambcoords = 0;
+ new->ambcoords = (Univcoord_T *) NULL;
+ new->amb_knowni = (int *) NULL;
+ new->amb_nmismatches = (int *) NULL;
+ new->amb_probs = (double *) NULL;
+ new->amb_common_prob = 0.0;
+ new->amb_donor_common_p = false;
+ } else {
+ new->nambcoords = old->nambcoords;
+ new->ambcoords = (Univcoord_T *) MALLOC(old->nambcoords * sizeof(Univcoord_T));
+ new->amb_knowni = (int *) MALLOC(old->nambcoords * sizeof(int));
+ new->amb_nmismatches = (int *) MALLOC(old->nambcoords * sizeof(int));
+ new->amb_probs = (double *) MALLOC(old->nambcoords * sizeof(double));
+ new->amb_common_prob = old->amb_common_prob;
+ new->amb_donor_common_p = old->amb_donor_common_p;
+
+ memcpy(new->ambcoords,old->ambcoords,old->nambcoords * sizeof(Univcoord_T));
+ memcpy(new->amb_knowni,old->amb_knowni,old->nambcoords * sizeof(int));
+ memcpy(new->amb_nmismatches,old->amb_nmismatches,old->nambcoords * sizeof(int));
+ memcpy(new->amb_probs,old->amb_probs,old->nambcoords * sizeof(double));
+ }
+
return new;
}
}
+/* Treat as a sense donor, on either plus or minus strand */
+T
+Substring_new_startfrag (Univcoord_T startfrag_coord, int splice_pos, int nmismatches,
+ Univcoord_T left, Compress_T query_compress,
+ int querylength, bool plusp, int genestrand, bool first_read_p,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength) {
+ T new;
+ int querystart, queryend;
+ Univcoord_T genomicstart, alignstart, alignend;
+ Endtype_T start_endtype, end_endtype;
+ bool trim_left_p, trim_right_p;
+
+ /* Previously checked if left >= chroffset + chrlength to exclude
+ the duplicate length, but now excluding all translocations to
+ circular chromosomes */
+
+ if (chroffset + chrlength < chrhigh) {
+ /* Don't splice to circular chromosomes */
+ return (T) NULL;
+
+ } else if (plusp == true) {
+ genomicstart = left;
+ /* genomicend = left + querylength; */
+
+ start_endtype = END;
+ end_endtype = FRAG;
+
+ querystart = 0;
+ queryend = splice_pos;
+ alignstart = genomicstart;
+ alignend = genomicstart + splice_pos;
+ trim_left_p = true; /* querystart == 0 */
+ trim_right_p = false;
+
+ } else {
+ genomicstart = left + querylength;
+ /* genomicend = left; */
+
+ start_endtype = END;
+ end_endtype = FRAG;
+
+ querystart = 0;
+ queryend = querylength - splice_pos;
+ alignstart = genomicstart;
+ alignend = genomicstart - (querylength - splice_pos);
+ trim_left_p = true; /* querystart == 0 */
+ trim_right_p = false;
+ }
+
+ if ((new = Substring_new(nmismatches,chrnum,chroffset,chrhigh,chrlength,
+ query_compress,start_endtype,end_endtype,querystart,queryend,querylength,
+ alignstart,alignend,/*genomiclength*/querylength,
+ /*exactp*/false,plusp,genestrand,first_read_p,
+ trim_left_p,trim_right_p,/*outofbounds_start*/0,/*outofbounds_end*/0,
+ /*minlength*/0)) == NULL) {
+ return (T) NULL;
+ }
+
+ debug2(printf("Making new startfrag with coord %u and left %u, plusp %d, query %d..%d, genome %u..%u\n",
+ startfrag_coord,left,plusp,querystart,queryend,alignstart - chroffset,alignend - chroffset));
+ new->splicecoord = startfrag_coord;
+ new->splicesites_knowni = -1;
+
+ new->chimera_modelpos = left + splice_pos;
+ assert(new->splicecoord == new->chimera_modelpos);
+ new->chimera_sensedir = SENSE_NULL;
+ /* new->chimera_knownp = false; */
+ new->chimera_novelp = true;
+
+ if (plusp == true) {
+ new->chimera_pos = splice_pos;
+ } else {
+ new->chimera_pos = querylength - splice_pos;
+ }
+ new->chimera_prob = 0.0;
+
+ new->siteA_prob = 0.0;
+ new->siteD_prob = 0.0;
+
+ return new;
+}
+
+
+/* Treat as a sense acceptor, on either plus or minus strand */
+T
+Substring_new_endfrag (Univcoord_T endfrag_coord, int splice_pos, int nmismatches,
+ Univcoord_T left, Compress_T query_compress,
+ int querylength, bool plusp, int genestrand, bool first_read_p,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength) {
+ T new;
+ int querystart, queryend;
+ Univcoord_T genomicstart, alignstart, alignend;
+ Endtype_T start_endtype, end_endtype;
+ bool trim_left_p, trim_right_p;
+
+ /* Previously checked if left >= chroffset + chrlength to exclude
+ the duplicate length, but now excluding all translocations to
+ circular chromosomes */
+
+ if (chroffset + chrlength < chrhigh) {
+ /* Don't splice to circular chromosomes */
+ return (T) NULL;
+
+ } else if (plusp == true) {
+ genomicstart = left;
+ /* genomicend = left + querylength; */
+
+ start_endtype = FRAG;
+ end_endtype = END;
+
+ querystart = splice_pos;
+ queryend = querylength;
+ alignstart = genomicstart + splice_pos;
+ alignend = genomicstart + querylength;
+ trim_left_p = false;
+ trim_right_p = true; /* queryend == querylength */
+
+ } else {
+ genomicstart = left + querylength;
+ /* genomicend = left; */
+
+ start_endtype = FRAG;
+ end_endtype = END;
+
+ querystart = querylength - splice_pos;
+ queryend = querylength;
+ alignstart = left + splice_pos;
+ alignend = left;
+ trim_left_p = false;
+ trim_right_p = true; /* queryend == querylength */
+ }
+
+ if ((new = Substring_new(nmismatches,chrnum,chroffset,chrhigh,chrlength,
+ query_compress,start_endtype,end_endtype,querystart,queryend,querylength,
+ alignstart,alignend,/*genomiclength*/querylength,
+ /*exactp*/false,plusp,genestrand,first_read_p,
+ trim_left_p,trim_right_p,/*outofbounds_start*/0,/*outofbounds_end*/0,
+ /*minlength*/0)) == NULL) {
+ return (T) NULL;
+ }
+
+ debug2(printf("Making new endfrag with coord %u and left %u, plusp %d, query %d..%d, genome %u..%u\n",
+ endfrag_coord,left,plusp,querystart,queryend,alignstart - chroffset,alignend - chroffset));
+ new->splicecoord = endfrag_coord;
+ new->splicesites_knowni = -1;
+
+ new->chimera_modelpos = left + splice_pos;
+ assert(new->splicecoord == new->chimera_modelpos);
+ new->chimera_sensedir = SENSE_NULL;
+ /* new->chimera_knownp = false; */
+ new->chimera_novelp = true;
+
+ if (plusp == true) {
+ new->chimera_pos = splice_pos;
+ } else {
+ new->chimera_pos = querylength - splice_pos;
+ }
+ new->chimera_prob = 0.0;
+
+ new->siteA_prob = 0.0;
+ new->siteD_prob = 0.0;
+
+ return new;
+}
+
+
T
Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, int donor_nmismatches,
double donor_prob, Univcoord_T left, Compress_T query_compress,
- int querylength, bool plusp, int genestrand, bool first_read_p, bool sensep,
+ int querylength, bool plusp, int genestrand, bool first_read_p, int sensedir,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength) {
T new;
- int querystart, queryend, extraleft, extraright;
+ int querystart, queryend;
Univcoord_T genomicstart, genomicend, alignstart, alignend;
Endtype_T start_endtype, end_endtype;
bool trim_left_p, trim_right_p;
@@ -2539,22 +3136,18 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, i
} else if (plusp == true) {
genomicstart = left;
genomicend = left + querylength;
- if (sensep == true) {
+ if (sensedir == SENSE_FORWARD) {
start_endtype = END;
end_endtype = DON;
querystart = 0;
queryend = donor_pos;
- extraleft = 0;
- extraright = 2;
alignstart = genomicstart;
alignend = genomicstart + donor_pos;
trim_left_p = true; /* querystart == 0 */
trim_right_p = false;
- } else {
- extraleft = 2;
- extraright = 0;
+ } else if (sensedir == SENSE_ANTI) {
start_endtype = DON;
end_endtype = END;
@@ -2564,27 +3157,26 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, i
alignend = genomicend;
trim_left_p = false;
trim_right_p = true; /* queryend == querylength */
+
+ } else {
+ abort();
}
} else {
genomicstart = left + querylength;
genomicend = left;
- if (sensep == true) {
+ if (sensedir == SENSE_FORWARD) {
start_endtype = END;
end_endtype = DON;
querystart = 0;
queryend = querylength - donor_pos;
- extraleft = 0;
- extraright = 2;
alignstart = genomicstart;
alignend = genomicstart - (querylength - donor_pos);
trim_left_p = true; /* querystart == 0 */
trim_right_p = false;
- } else {
- extraleft = 2;
- extraright = 0;
+ } else if (sensedir == SENSE_ANTI) {
start_endtype = DON;
end_endtype = END;
@@ -2594,26 +3186,29 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, i
alignend = genomicend;
trim_left_p = false;
trim_right_p = true; /* queryend == querylength */
+
+ } else {
+ abort();
}
}
- if ((new = Substring_new(donor_nmismatches,chrnum,chroffset,chrhigh,chrlength,left,
- genomicstart,genomicend,/*genomicstart_adj*/genomicstart,/*genomicend_adj*/genomicend,
+ if ((new = Substring_new(donor_nmismatches,chrnum,chroffset,chrhigh,chrlength,
query_compress,start_endtype,end_endtype,querystart,queryend,querylength,
alignstart,alignend,/*genomiclength*/querylength,
- extraleft,extraright,/*exactp*/false,plusp,genestrand,first_read_p,
- trim_left_p,trim_right_p,/*minlength*/0)) == NULL) {
+ /*exactp*/false,plusp,genestrand,first_read_p,
+ trim_left_p,trim_right_p,/*outofbounds_start*/0,/*outofbounds_end*/0,
+ /*minlength*/0)) == NULL) {
return (T) NULL;
}
- debug2(printf("Making new donor with splicesites_i %d, coord %u and left %u, plusp %d, sensep %d, query %d..%d, genome %u..%u\n",
- donor_knowni,donor_coord,left,plusp,sensep,querystart,queryend,alignstart - chroffset,alignend - chroffset));
+ debug2(printf("Making new donor with splicesites_i %d, coord %u and left %u, plusp %d, sensedir %d, query %d..%d, genome %u..%u\n",
+ donor_knowni,donor_coord,left,plusp,sensedir,querystart,queryend,alignstart - chroffset,alignend - chroffset));
new->splicecoord = donor_coord;
new->splicesites_knowni = donor_knowni;
new->chimera_modelpos = left + donor_pos;
assert(new->splicecoord == new->chimera_modelpos);
- new->chimera_sensep = sensep;
+ new->chimera_sensedir = sensedir;
if (donor_knowni >= 0) {
new->chimera_knownp = true;
/* new->chimera_novelp = false */
@@ -2629,6 +3224,9 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, i
}
new->chimera_prob = donor_prob;
+ new->siteA_prob = 0.0;
+ new->siteD_prob = donor_prob;
+
return new;
}
@@ -2636,10 +3234,10 @@ Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, i
T
Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acceptor_pos, int acceptor_nmismatches,
double acceptor_prob, Univcoord_T left, Compress_T query_compress,
- int querylength, bool plusp, int genestrand, bool first_read_p, bool sensep,
+ int querylength, bool plusp, int genestrand, bool first_read_p, int sensedir,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength) {
T new;
- int querystart, queryend, extraleft, extraright;
+ int querystart, queryend;
Univcoord_T genomicstart, genomicend, alignstart, alignend;
Endtype_T start_endtype, end_endtype;
bool trim_left_p, trim_right_p;
@@ -2655,9 +3253,7 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
} else if (plusp == true) {
genomicstart = left;
genomicend = left + querylength;
- if (sensep == true) {
- extraleft = 2;
- extraright = 0;
+ if (sensedir == SENSE_FORWARD) {
start_endtype = ACC;
end_endtype = END;
@@ -2668,9 +3264,7 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
trim_left_p = false;
trim_right_p = true; /* queryend == querylength */
- } else {
- extraleft = 0;
- extraright = 2;
+ } else if (sensedir == SENSE_ANTI) {
start_endtype = END;
end_endtype = ACC;
@@ -2680,14 +3274,15 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
alignend = genomicstart + acceptor_pos;
trim_left_p = true; /* querystart == 0 */
trim_right_p = false;
+
+ } else {
+ abort();
}
} else {
genomicstart = left + querylength;
genomicend = left;
- if (sensep == true) {
- extraleft = 2;
- extraright = 0;
+ if (sensedir == SENSE_FORWARD) {
start_endtype = ACC;
end_endtype = END;
@@ -2698,9 +3293,7 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
trim_left_p = false;
trim_right_p = true; /* queryend == querylength */
- } else {
- extraleft = 0;
- extraright = 2;
+ } else if (sensedir == SENSE_ANTI) {
start_endtype = END;
end_endtype = ACC;
@@ -2710,26 +3303,29 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
alignend = genomicstart - (querylength - acceptor_pos);
trim_left_p = true; /* querystart == 0 */
trim_right_p = false;
+
+ } else {
+ abort();
}
}
- if ((new = Substring_new(acceptor_nmismatches,chrnum,chroffset,chrhigh,chrlength,left,
- genomicstart,genomicend,/*genomicstart_adj*/genomicstart,/*genomicend_adj*/genomicend,
+ if ((new = Substring_new(acceptor_nmismatches,chrnum,chroffset,chrhigh,chrlength,
query_compress,start_endtype,end_endtype,querystart,queryend,querylength,
alignstart,alignend,/*genomiclength*/querylength,
- extraleft,extraright,/*exactp*/false,plusp,genestrand,first_read_p,
- trim_left_p,trim_right_p,/*minlength*/0)) == NULL) {
+ /*exactp*/false,plusp,genestrand,first_read_p,
+ trim_left_p,trim_right_p,/*outofbounds_start*/0,/*outofbounds_end*/0,
+ /*minlength*/0)) == NULL) {
return (T) NULL;
}
- debug2(printf("Making new acceptor with splicesites_i %d, coord %u and left %u, plusp %d, sensep %d, query %d..%d, genome %u..%u\n",
- acceptor_knowni,acceptor_coord,left,plusp,sensep,querystart,queryend,alignstart - chroffset,alignend - chroffset));
+ debug2(printf("Making new acceptor with splicesites_i %d, coord %u and left %u, plusp %d, sensedir %d, query %d..%d, genome %u..%u\n",
+ acceptor_knowni,acceptor_coord,left,plusp,sensedir,querystart,queryend,alignstart - chroffset,alignend - chroffset));
new->splicecoord = acceptor_coord;
new->splicesites_knowni = acceptor_knowni;
new->chimera_modelpos = left + acceptor_pos;
assert(new->splicecoord == new->chimera_modelpos);
- new->chimera_sensep = sensep;
+ new->chimera_sensedir = sensedir;
if (acceptor_knowni >= 0) {
new->chimera_knownp = true;
/* new->chimera_novelp = false */
@@ -2745,6 +3341,9 @@ Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acc
}
new->chimera_prob = acceptor_prob;
+ new->siteA_prob = acceptor_prob;
+ new->siteD_prob = 0.0;
+
return new;
}
@@ -2755,60 +3354,67 @@ Substring_new_shortexon (Univcoord_T acceptor_coord, int acceptor_knowni, Univco
int acceptor_pos, int donor_pos, int nmismatches,
double acceptor_prob, double donor_prob, Univcoord_T left,
Compress_T query_compress, int querylength,
- bool plusp, int genestrand, bool first_read_p, bool sensep,
+ bool plusp, int genestrand, bool first_read_p, int sensedir,
bool acceptor_ambp, bool donor_ambp,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength) {
T new;
int querystart, queryend;
- Univcoord_T genomicstart, genomicend, alignstart, alignend;
+ Univcoord_T genomicstart, alignstart, alignend;
Endtype_T start_endtype, end_endtype;
if (plusp == true) {
genomicstart = left;
- genomicend = left + querylength;
- if (sensep == true) {
+ /* genomicend = left + querylength; */
+ if (sensedir == SENSE_FORWARD) {
start_endtype = (acceptor_ambp == true) ? AMB_ACC : ACC;
end_endtype = (donor_ambp == true) ? AMB_DON : DON;
querystart = acceptor_pos;
queryend = donor_pos;
alignstart = genomicstart + acceptor_pos;
alignend = genomicstart + donor_pos;
- } else {
+
+ } else if (sensedir == SENSE_ANTI) {
start_endtype = (donor_ambp == true) ? AMB_DON : DON;
end_endtype = (acceptor_ambp == true) ? AMB_ACC : ACC;
querystart = donor_pos;
queryend = acceptor_pos;
alignstart = genomicstart + donor_pos;
alignend = genomicstart + acceptor_pos;
+
+ } else {
+ abort();
}
} else {
genomicstart = left + querylength;
- genomicend = left;
- if (sensep == true) {
+ /* genomicend = left; */
+ if (sensedir == SENSE_FORWARD) {
start_endtype = (acceptor_ambp == true) ? AMB_ACC : ACC;
end_endtype = (donor_ambp == true) ? AMB_DON : DON;
querystart = querylength - acceptor_pos;
queryend = querylength - donor_pos;
alignstart = genomicstart - (querylength - acceptor_pos);
alignend = genomicstart - (querylength - donor_pos);
- } else {
+
+ } else if (sensedir == SENSE_ANTI) {
start_endtype = (donor_ambp == true) ? AMB_DON : DON;
end_endtype = (acceptor_ambp == true) ? AMB_ACC : ACC;
querystart = querylength - donor_pos;
queryend = querylength - acceptor_pos;
alignstart = genomicstart - (querylength - donor_pos);
alignend = genomicstart - (querylength - acceptor_pos);
+
+ } else {
+ abort();
}
}
- if ((new = Substring_new(nmismatches,chrnum,chroffset,chrhigh,chrlength,left,
- genomicstart,genomicend,/*genomicstart_adj*/genomicstart,/*genomicend_adj*/genomicend,
+ if ((new = Substring_new(nmismatches,chrnum,chroffset,chrhigh,chrlength,
query_compress,start_endtype,end_endtype,querystart,queryend,querylength,
alignstart,alignend,/*genomiclength*/querylength,
- /*extraleft*/2,/*extraright*/2,/*exactp*/false,plusp,genestrand,first_read_p,
+ /*exactp*/false,plusp,genestrand,first_read_p,
/*trim_left_p*/false,/*trim_right_p*/false,
- /*minlength*/0)) == NULL) {
+ /*outofbounds_start*/0,/*outofbounds_end*/0,/*minlength*/0)) == NULL) {
return (T) NULL;
}
@@ -2820,7 +3426,7 @@ Substring_new_shortexon (Univcoord_T acceptor_coord, int acceptor_knowni, Univco
new->chimera_modelpos = left + acceptor_pos;
new->chimera_modelpos_2 = left + donor_pos;
- new->chimera_sensep = sensep;
+ new->chimera_sensedir = sensedir;
if (acceptor_knowni >= 0) {
new->chimera_knownp = true;
@@ -2849,6 +3455,9 @@ Substring_new_shortexon (Univcoord_T acceptor_coord, int acceptor_knowni, Univco
new->chimera_prob = acceptor_prob;
new->chimera_prob_2 = donor_prob;
+ new->siteA_prob = acceptor_prob;
+ new->siteD_prob = donor_prob;
+
return new;
}
@@ -2859,14 +3468,27 @@ Substring_assign_donor_prob (T donor) {
if (donor == NULL) {
return;
- }
- if (donor->chimera_knownp == false) {
+ } else if (donor->chimera_knownp == false) {
/* Prob already assigned */
- } else if (donor->plusp == donor->chimera_sensep) {
- donor->chimera_prob = Maxent_hr_donor_prob(donor->chimera_modelpos,donor->chroffset);
+
+ } else if (donor->chimera_sensedir == SENSE_FORWARD) {
+ if (donor->plusp == true) {
+ donor->chimera_prob = Maxent_hr_donor_prob(donor->chimera_modelpos,donor->chroffset);
+ } else {
+ donor->chimera_prob = Maxent_hr_antidonor_prob(donor->chimera_modelpos,donor->chroffset);
+ }
+
+ } else if (donor->chimera_sensedir == SENSE_ANTI) {
+ if (donor->plusp == true) {
+ donor->chimera_prob = Maxent_hr_antidonor_prob(donor->chimera_modelpos,donor->chroffset);
+ } else {
+ donor->chimera_prob = Maxent_hr_donor_prob(donor->chimera_modelpos,donor->chroffset);
+ }
+
} else {
- donor->chimera_prob = Maxent_hr_antidonor_prob(donor->chimera_modelpos,donor->chroffset);
+ /* SENSE_NULL */
+ donor->chimera_prob = 0.0;
}
return;
@@ -2877,14 +3499,27 @@ Substring_assign_acceptor_prob (T acceptor) {
if (acceptor == NULL) {
return;
- }
- if (acceptor->chimera_knownp == false) {
+ } else if (acceptor->chimera_knownp == false) {
/* Prob already assigned */
- } else if (acceptor->plusp == acceptor->chimera_sensep) {
- acceptor->chimera_prob = Maxent_hr_acceptor_prob(acceptor->chimera_modelpos,acceptor->chroffset);
+
+ } else if (acceptor->chimera_sensedir == SENSE_FORWARD) {
+ if (acceptor->plusp == true) {
+ acceptor->chimera_prob = Maxent_hr_acceptor_prob(acceptor->chimera_modelpos,acceptor->chroffset);
+ } else {
+ acceptor->chimera_prob = Maxent_hr_antiacceptor_prob(acceptor->chimera_modelpos,acceptor->chroffset);
+ }
+
+ } else if (acceptor->chimera_sensedir == SENSE_ANTI) {
+ if (acceptor->plusp == true) {
+ acceptor->chimera_prob = Maxent_hr_antiacceptor_prob(acceptor->chimera_modelpos,acceptor->chroffset);
+ } else {
+ acceptor->chimera_prob = Maxent_hr_acceptor_prob(acceptor->chimera_modelpos,acceptor->chroffset);
+ }
+
} else {
- acceptor->chimera_prob = Maxent_hr_antiacceptor_prob(acceptor->chimera_modelpos,acceptor->chroffset);
+ /* SENSE_NULL */
+ acceptor->chimera_prob = 0.0;
}
return;
@@ -2896,19 +3531,44 @@ Substring_assign_shortexon_prob (T shortexon) {
if (shortexon->chimera_knownp == false) {
/* Prob1 already assigned */
- } else if (shortexon->plusp == shortexon->chimera_sensep) {
- shortexon->chimera_prob = Maxent_hr_acceptor_prob(shortexon->chimera_modelpos,shortexon->chroffset);
+
+ } else if (shortexon->chimera_sensedir == SENSE_FORWARD) {
+ if (shortexon->plusp == true) {
+ shortexon->chimera_prob = Maxent_hr_acceptor_prob(shortexon->chimera_modelpos,shortexon->chroffset);
+ } else {
+ shortexon->chimera_prob = Maxent_hr_antiacceptor_prob(shortexon->chimera_modelpos,shortexon->chroffset);
+ }
+
+ } else if (shortexon->chimera_sensedir == SENSE_ANTI) {
+ if (shortexon->plusp == true) {
+ shortexon->chimera_prob = Maxent_hr_antiacceptor_prob(shortexon->chimera_modelpos,shortexon->chroffset);
+ } else {
+ shortexon->chimera_prob = Maxent_hr_acceptor_prob(shortexon->chimera_modelpos,shortexon->chroffset);
+ }
+
} else {
- shortexon->chimera_prob = Maxent_hr_antiacceptor_prob(shortexon->chimera_modelpos,shortexon->chroffset);
+ abort();
}
-
if (shortexon->chimera_knownp_2 == false) {
/* Prob2 already assigned */
- } else if (shortexon->plusp == shortexon->chimera_sensep) {
- shortexon->chimera_prob_2 = Maxent_hr_donor_prob(shortexon->chimera_modelpos_2,shortexon->chroffset);
+
+ } else if (shortexon->chimera_sensedir == SENSE_FORWARD) {
+ if (shortexon->plusp == true) {
+ shortexon->chimera_prob_2 = Maxent_hr_donor_prob(shortexon->chimera_modelpos_2,shortexon->chroffset);
+ } else {
+ shortexon->chimera_prob_2 = Maxent_hr_antidonor_prob(shortexon->chimera_modelpos_2,shortexon->chroffset);
+ }
+
+ } else if (shortexon->chimera_sensedir == SENSE_ANTI) {
+ if (shortexon->plusp == true) {
+ shortexon->chimera_prob_2 = Maxent_hr_antidonor_prob(shortexon->chimera_modelpos_2,shortexon->chroffset);
+ } else {
+ shortexon->chimera_prob_2 = Maxent_hr_donor_prob(shortexon->chimera_modelpos_2,shortexon->chroffset);
+ }
+
} else {
- shortexon->chimera_prob_2 = Maxent_hr_antidonor_prob(shortexon->chimera_modelpos_2,shortexon->chroffset);
+ abort();
}
return;
@@ -2962,7 +3622,7 @@ descending_pos_cmp (const void *a, const void *b) {
List_T
Substring_sort_chimera_halves (List_T hitlist, bool ascendingp) {
- List_T sorted = NULL, p;
+ List_T sorted = NULL;
T x, *hits;
int n, i, j;
bool *eliminate;
@@ -3014,7 +3674,7 @@ Substring_sort_chimera_halves (List_T hitlist, bool ascendingp) {
static void
-print_snp_labels (FILE *fp, T this, Shortread_T queryseq) {
+print_snp_labels (Filestring_T fp, T this, Shortread_T queryseq) {
int *snps, nsnps, querypos, i;
char *label, *seq1, *seq2;
bool allocp, printp = false;
@@ -3033,7 +3693,7 @@ print_snp_labels (FILE *fp, T this, Shortread_T queryseq) {
/*sortp*/false);
}
- fprintf(fp,",snps:");
+ FPRINTF(fp,",snps:");
seq1 = Shortread_fullpointer_uc(queryseq);
if (this->genomic_bothdiff == NULL) {
@@ -3068,10 +3728,10 @@ print_snp_labels (FILE *fp, T this, Shortread_T queryseq) {
if (isupper(seq2[querypos]) && seq1[querypos] != seq2[querypos]) {
label = IIT_label(snps_iit,snps[i],&allocp);
if (printp) {
- fprintf(fp,"|");
+ FPRINTF(fp,"|");
}
- fprintf(fp,"%d@",querypos+1);
- fprintf(fp,"%s",label);
+ FPRINTF(fp,"%d@",querypos+1);
+ FPRINTF(fp,"%s",label);
printp = true;
if (allocp) FREE(label);
}
@@ -3094,10 +3754,10 @@ print_snp_labels (FILE *fp, T this, Shortread_T queryseq) {
if (isupper(seq2[querypos]) && seq1[querypos] != seq2[querypos]) {
label = IIT_label(snps_iit,snps[i],&allocp);
if (printp) {
- fprintf(fp,"|");
+ FPRINTF(fp,"|");
}
- fprintf(fp,"%d@",querypos+1);
- fprintf(fp,"%s",label);
+ FPRINTF(fp,"%d@",querypos+1);
+ FPRINTF(fp,"%s",label);
printp = true;
if (allocp) FREE(label);
}
@@ -3166,7 +3826,7 @@ Substring_contains_known_splicesite (T this) {
/* Has a copy in pair.c */
static void
-print_splicesite_labels (FILE *fp, T this, int typeint, int chimera_pos, char *tag) {
+print_splicesite_labels (Filestring_T fp, T this, int typeint, int chimera_pos, char *tag) {
Chrpos_T splicesitepos;
int *splicesites, nsplicesites, i;
char *label;
@@ -3191,14 +3851,14 @@ print_splicesite_labels (FILE *fp, T this, int typeint, int chimera_pos, char *t
this->chrnum,splicesitepos,splicesitepos+1U,typeint);
#endif
} else {
- fprintf(fp,",%s:",tag);
+ FPRINTF(fp,",%s:",tag);
label = IIT_label(splicesites_iit,splicesites[0],&allocp);
- fprintf(fp,"%s",label);
+ FPRINTF(fp,"%s",label);
if (allocp) FREE(label);
for (i = 1; i < nsplicesites; i++) {
label = IIT_label(splicesites_iit,splicesites[i],&allocp);
- fprintf(fp,"|%s",label);
+ FPRINTF(fp,"|%s",label);
if (allocp) FREE(label);
}
FREE(splicesites);
@@ -3273,6 +3933,46 @@ Substring_intragenic_splice_p (Genomicpos_T splicedistance, Chrnum_T chrnum,
#endif
+Chrpos_T
+Substring_compute_chrpos (T this, int hardclip_low, bool hide_soft_clips_p) {
+ Chrpos_T chrpos;
+
+ if (hide_soft_clips_p == true) {
+ if (this->plusp == true) {
+ /* Add 1 to report in 1-based coordinates */
+ chrpos = this->genomicstart - this->chroffset + 1U;
+ chrpos += hardclip_low;
+
+ } else {
+ /* Add 1 to report in 1-based coordinates */
+ chrpos = this->genomicend - this->chroffset + 1U;
+ chrpos += hardclip_low;
+ }
+
+ } else {
+ if (this->plusp == true) {
+ chrpos = this->genomicstart - this->chroffset + 1U;
+ if (this->querystart > hardclip_low) {
+ chrpos += this->querystart; /* not querystart_orig */
+ } else {
+ chrpos += hardclip_low;
+ }
+
+ } else {
+ chrpos = this->genomicend - this->chroffset + 1U;
+ if (this->querylength - this->queryend > hardclip_low) {
+ chrpos += this->querylength - this->queryend; /* not queryend_orig */
+ } else {
+ chrpos += hardclip_low;
+ }
+ }
+ }
+
+ return chrpos;
+}
+
+
+
/* Taken from NCBI Blast 2.2.29, algo/blast/core/blast_stat.c */
/* Karlin-Altschul formula: m n exp(-lambda * S + log k) = k m n exp(-lambda * S) */
/* Also in pair.c */
@@ -3300,14 +4000,14 @@ blast_bitscore (int alignlength, int nmismatches) {
void
-Substring_print_m8 (FILE *fp, T substring, Shortread_T headerseq, char *acc_suffix,
+Substring_print_m8 (Filestring_T fp, T substring, Shortread_T headerseq, char *acc_suffix,
char *chr, bool invertp) {
double identity;
int alignlength_trim;
- fprintf(fp,"%s%s",Shortread_accession(headerseq),acc_suffix); /* field 0: accession */
+ FPRINTF(fp,"%s%s",Shortread_accession(headerseq),acc_suffix); /* field 0: accession */
- fprintf(fp,"\t%s",chr); /* field 1: chr */
+ FPRINTF(fp,"\t%s",chr); /* field 1: chr */
/* field 2: identity */
if (substring->plusp == true) {
@@ -3317,45 +4017,45 @@ Substring_print_m8 (FILE *fp, T substring, Shortread_T headerseq, char *acc_suff
}
identity = (double) (alignlength_trim - substring->nmismatches_bothdiff)/(double) alignlength_trim;
- fprintf(fp,"\t%.1f",100.0*identity);
+ FPRINTF(fp,"\t%.1f",100.0*identity);
- fprintf(fp,"\t%d",alignlength_trim); /* field 3: query length */
+ FPRINTF(fp,"\t%d",alignlength_trim); /* field 3: query length */
- fprintf(fp,"\t%d",substring->nmismatches_bothdiff); /* field 4: nmismatches */
+ FPRINTF(fp,"\t%d",substring->nmismatches_bothdiff); /* field 4: nmismatches */
- fprintf(fp,"\t0"); /* field 5: gap openings */
+ FPRINTF(fp,"\t0"); /* field 5: gap openings */
- fprintf(fp,"\t%d",substring->querystart + 1); /* field 6: query start */
+ FPRINTF(fp,"\t%d",substring->querystart + 1); /* field 6: query start */
- fprintf(fp,"\t%d",substring->queryend); /* field 7: query end */
+ FPRINTF(fp,"\t%d",substring->queryend); /* field 7: query end */
/* fields 8 and 9: chr start and end */
if (substring->plusp == true) {
if (invertp == false) {
- fprintf(fp,"\t%u\t%u",substring->alignstart_trim - substring->chroffset + 1U,
+ FPRINTF(fp,"\t%u\t%u",substring->alignstart_trim - substring->chroffset + 1U,
substring->alignend_trim - substring->chroffset);
} else {
- fprintf(fp,"\t%u\t%u",substring->alignend_trim - substring->chroffset,
+ FPRINTF(fp,"\t%u\t%u",substring->alignend_trim - substring->chroffset,
substring->alignstart_trim - substring->chroffset + 1U);
}
} else {
if (invertp == false) {
- fprintf(fp,"\t%u\t%u",substring->alignstart_trim - substring->chroffset,
+ FPRINTF(fp,"\t%u\t%u",substring->alignstart_trim - substring->chroffset,
substring->alignend_trim - substring->chroffset + 1U);
} else {
- fprintf(fp,"\t%u\t%u",substring->alignend_trim - substring->chroffset + 1U,
+ FPRINTF(fp,"\t%u\t%u",substring->alignend_trim - substring->chroffset + 1U,
substring->alignstart_trim - substring->chroffset);
}
}
/* field 10: E value */
- fprintf(fp,"\t%.2g",blast_evalue(alignlength_trim,substring->nmismatches_bothdiff));
+ FPRINTF(fp,"\t%.2g",blast_evalue(alignlength_trim,substring->nmismatches_bothdiff));
/* field 11: bit score */
- fprintf(fp,"\t%.1f",blast_bitscore(alignlength_trim,substring->nmismatches_bothdiff));
+ FPRINTF(fp,"\t%.1f",blast_bitscore(alignlength_trim,substring->nmismatches_bothdiff));
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
return;
}
@@ -3363,11 +4063,11 @@ Substring_print_m8 (FILE *fp, T substring, Shortread_T headerseq, char *acc_suff
static void
-print_forward (FILE *fp, char *string, int n) {
+print_forward (Filestring_T fp, char *string, int n) {
int i;
for (i = 0; i < n; i++) {
- fprintf(fp,"%c",string[i]);
+ FPRINTF(fp,"%c",string[i]);
}
return;
}
@@ -3376,11 +4076,11 @@ print_forward (FILE *fp, char *string, int n) {
static void
-print_lc (FILE *fp, char *string, int n) {
+print_lc (Filestring_T fp, char *string, int n) {
int i;
for (i = 0; i < n; i++) {
- fprintf(fp,"%c",(char) tolower(string[i]));
+ FPRINTF(fp,"%c",(char) tolower(string[i]));
}
return;
}
@@ -3388,21 +4088,21 @@ print_lc (FILE *fp, char *string, int n) {
static void
-print_revcomp (FILE *fp, char *nt, int len) {
+print_revcomp (Filestring_T fp, char *nt, int len) {
int i;
for (i = len-1; i >= 0; --i) {
- fprintf(fp,"%c",complCode[(int) nt[i]]);
+ FPRINTF(fp,"%c",complCode[(int) nt[i]]);
}
return;
}
static void
-print_revcomp_lc (FILE *fp, char *nt, int len) {
+print_revcomp_lc (Filestring_T fp, char *nt, int len) {
int i;
for (i = len-1; i >= 0; --i) {
- fprintf(fp,"%c",(char) tolower(complCode[(int) nt[i]]));
+ FPRINTF(fp,"%c",(char) tolower(complCode[(int) nt[i]]));
}
return;
}
@@ -3410,7 +4110,7 @@ print_revcomp_lc (FILE *fp, char *nt, int len) {
static void
-print_genomic (FILE *fp, T substring, char *deletion, int deletionlength, bool invertp,
+print_genomic (Filestring_T fp, T substring, char *deletion, int deletionlength, bool invertp,
Shortread_T queryseq) {
int i;
@@ -3434,10 +4134,10 @@ print_genomic (FILE *fp, T substring, char *deletion, int deletionlength, bool i
}
for (i = 0; i < Shortread_choplength(queryseq); i++) {
- fprintf(fp,"*");
+ FPRINTF(fp,"*");
}
- fprintf(fp,"\t");
- fprintf(fp,"%d..%d",1 + substring->querystart,substring->queryend);
+ FPRINTF(fp,"\t");
+ FPRINTF(fp,"%d..%d",1 + substring->querystart,substring->queryend);
} else {
if (substring->genomic_bothdiff == NULL) {
@@ -3459,10 +4159,10 @@ print_genomic (FILE *fp, T substring, char *deletion, int deletionlength, bool i
print_revcomp(fp,substring->genomic_bothdiff,substring->querystart);
}
for (i = 0; i < Shortread_choplength(queryseq); i++) {
- fprintf(fp,"*");
+ FPRINTF(fp,"*");
}
- fprintf(fp,"\t");
- fprintf(fp,"%d..%d",1 + substring->querylength - substring->queryend,
+ FPRINTF(fp,"\t");
+ FPRINTF(fp,"%d..%d",1 + substring->querylength - substring->queryend,
substring->querylength - substring->querystart);
}
return;
@@ -3470,22 +4170,22 @@ print_genomic (FILE *fp, T substring, char *deletion, int deletionlength, bool i
static void
-print_coordinates (FILE *fp, T substring, char *chr, bool invertp) {
+print_coordinates (Filestring_T fp, T substring, char *chr, bool invertp) {
if (substring->plusp == true) {
if (invertp == false) {
- fprintf(fp,"+%s:%u..%u",chr,substring->alignstart_trim - substring->chroffset + 1U,
+ FPRINTF(fp,"+%s:%u..%u",chr,substring->alignstart_trim - substring->chroffset + 1U,
substring->alignend_trim - substring->chroffset);
} else {
- fprintf(fp,"-%s:%u..%u",chr,substring->alignend_trim - substring->chroffset,
+ FPRINTF(fp,"-%s:%u..%u",chr,substring->alignend_trim - substring->chroffset,
substring->alignstart_trim - substring->chroffset + 1U);
}
} else {
if (invertp == false) {
- fprintf(fp,"-%s:%u..%u",chr,substring->alignstart_trim - substring->chroffset,
+ FPRINTF(fp,"-%s:%u..%u",chr,substring->alignstart_trim - substring->chroffset,
substring->alignend_trim - substring->chroffset + 1U);
} else {
- fprintf(fp,"+%s:%u..%u",chr,substring->alignend_trim - substring->chroffset + 1U,
+ FPRINTF(fp,"+%s:%u..%u",chr,substring->alignend_trim - substring->chroffset + 1U,
substring->alignstart_trim - substring->chroffset);
}
}
@@ -3496,47 +4196,229 @@ print_coordinates (FILE *fp, T substring, char *chr, bool invertp) {
void
-Substring_print_single (FILE *fp, T substring, Shortread_T queryseq,
+Substring_print_alignment (Filestring_T fp, Junction_T pre_junction, T substring, Junction_T post_junction,
+ Shortread_T queryseq, Genome_T genome, char *chr, bool invertp) {
+ char *deletion_string;
+ int deletion_length;
+ Junctiontype_T type1, type2;
+ Chrpos_T splice_distance_1, splice_distance_2;
+
+ if (post_junction == NULL) {
+ deletion_string = (char *) NULL;
+ deletion_length = 0;
+ } else if (Junction_type(post_junction) != DEL_JUNCTION) {
+ deletion_string = (char *) NULL;
+ deletion_length = 0;
+ } else {
+ deletion_string = Junction_deletion_string(post_junction,genome,substring->plusp);
+ deletion_length = Junction_nindels(post_junction);
+ }
+
+ print_genomic(fp,substring,deletion_string,deletion_length,invertp,queryseq);
+ FREE(deletion_string);
+ FPRINTF(fp,"\t");
+ print_coordinates(fp,substring,chr,invertp);
+
+ FPRINTF(fp,"\t");
+ if (pre_junction == NULL) {
+ type1 = NO_JUNCTION;
+ if (invertp == false) {
+ FPRINTF(fp,"start:%d",substring->trim_left); /* Previously distinguished between start and term */
+ } else {
+ FPRINTF(fp,"start:%d",substring->trim_right); /* Previously distinguished between start and term */
+ }
+ } else if ((type1 = Junction_type(pre_junction)) == INS_JUNCTION) {
+ FPRINTF(fp,"ins:%d",Junction_nindels(pre_junction));
+ } else if (type1 == DEL_JUNCTION) {
+ FPRINTF(fp,"del:%d",Junction_nindels(pre_junction));
+ } else if (type1 == SPLICE_JUNCTION) {
+ if (invertp == false) {
+ if (Junction_sensedir(pre_junction) == SENSE_ANTI) {
+ FPRINTF(fp,"donor:%.2f",Junction_donor_prob(pre_junction));
+ } else {
+ FPRINTF(fp,"acceptor:%.2f",Junction_acceptor_prob(pre_junction));
+ }
+ } else {
+ if (Junction_sensedir(pre_junction) == SENSE_ANTI) {
+ FPRINTF(fp,"acceptor:%.2f",Junction_acceptor_prob(pre_junction));
+ } else {
+ FPRINTF(fp,"donor:%.2f",Junction_donor_prob(pre_junction));
+ }
+ }
+ } else if (type1 == CHIMERA_JUNCTION) {
+ FPRINTF(fp,"distant:%u",Junction_splice_distance(pre_junction));
+ } else {
+ abort();
+ }
+
+ FPRINTF(fp,"..");
+
+ if (post_junction == NULL) {
+ type2 = NO_JUNCTION;
+ if (invertp == false) {
+ FPRINTF(fp,"end:%d",substring->trim_right);
+ } else {
+ FPRINTF(fp,"end:%d",substring->trim_left);
+ }
+ } else if ((type2 = Junction_type(post_junction)) == INS_JUNCTION) {
+ FPRINTF(fp,"ins:%d",Junction_nindels(post_junction));
+ } else if (type2 == DEL_JUNCTION) {
+ FPRINTF(fp,"del:%d",Junction_nindels(post_junction));
+ } else if (type2 == SPLICE_JUNCTION) {
+ if (invertp == false) {
+ if (Junction_sensedir(post_junction) == SENSE_ANTI) {
+ FPRINTF(fp,"acceptor:%.2f",Junction_acceptor_prob(post_junction));
+ } else {
+ FPRINTF(fp,"donor:%.2f",Junction_donor_prob(post_junction));
+ }
+ } else {
+ if (Junction_sensedir(post_junction) == SENSE_ANTI) {
+ FPRINTF(fp,"donor:%.2f",Junction_donor_prob(post_junction));
+ } else {
+ FPRINTF(fp,"acceptor:%.2f",Junction_acceptor_prob(post_junction));
+ }
+ }
+ } else if (type2 == CHIMERA_JUNCTION) {
+ FPRINTF(fp,"distant:%u",Junction_splice_distance(post_junction));
+ } else {
+ abort();
+ }
+
+ FPRINTF(fp,",matches:%d,sub:%d",substring->nmatches,substring->nmismatches_bothdiff);
+ if (print_nsnpdiffs_p) {
+ FPRINTF(fp,"+%d=%d",substring->nmismatches_refdiff - substring->nmismatches_bothdiff,substring->nmismatches_refdiff);
+ if (print_snplabels_p && substring->nmismatches_refdiff > substring->nmismatches_bothdiff) {
+ print_snp_labels(fp,substring,queryseq);
+ }
+ }
+
+ if (type1 == SPLICE_JUNCTION && type2 == SPLICE_JUNCTION) {
+ if (invertp == false) {
+ if (Junction_sensedir(pre_junction) == SENSE_FORWARD) {
+ FPRINTF(fp,",dir:sense");
+ } else if (Junction_sensedir(pre_junction) == SENSE_ANTI) {
+ FPRINTF(fp,",dir:antisense");
+ } else {
+ FPRINTF(fp,",dir:unknown");
+ }
+ } else {
+ if (Junction_sensedir(pre_junction) == SENSE_FORWARD) {
+ FPRINTF(fp,",dir:antisense");
+ } else if (Junction_sensedir(pre_junction) == SENSE_ANTI) {
+ FPRINTF(fp,",dir:sense");
+ } else {
+ FPRINTF(fp,",dir:unknown");
+ }
+ }
+ splice_distance_1 = Junction_splice_distance(pre_junction);
+ splice_distance_2 = Junction_splice_distance(post_junction);
+ if (splice_distance_1 == 0 && splice_distance_2 == 0) {
+ /* Skip */
+ } else if (splice_distance_1 == 0) {
+ FPRINTF(fp,",splice_type:consistent");
+ FPRINTF(fp,",splice_dist_2:%u",splice_distance_2);
+ } else if (splice_distance_2 == 0) {
+ FPRINTF(fp,",splice_type:consistent");
+ FPRINTF(fp,",splice_dist_1:%u",splice_distance_1);
+ } else {
+ FPRINTF(fp,",splice_type:consistent");
+ FPRINTF(fp,",splice_dist_1:%u",splice_distance_1);
+ FPRINTF(fp,",splice_dist_2:%u",splice_distance_2);
+ }
+
+ } else if (type1 == SPLICE_JUNCTION) {
+ if (invertp == false) {
+ if (Junction_sensedir(pre_junction) == SENSE_FORWARD) {
+ FPRINTF(fp,",dir:sense");
+ } else if (Junction_sensedir(pre_junction) == SENSE_ANTI) {
+ FPRINTF(fp,",dir:antisense");
+ } else {
+ FPRINTF(fp,",dir:unknown");
+ }
+ } else {
+ if (Junction_sensedir(pre_junction) == SENSE_FORWARD) {
+ FPRINTF(fp,",dir:antisense");
+ } else if (Junction_sensedir(pre_junction) == SENSE_ANTI) {
+ FPRINTF(fp,",dir:sense");
+ } else {
+ FPRINTF(fp,",dir:unknown");
+ }
+ }
+ if ((splice_distance_1 = Junction_splice_distance(pre_junction)) > 0) {
+ FPRINTF(fp,",splice_type:consistent");
+ FPRINTF(fp,",splice_dist_1:%u",splice_distance_1);
+ }
+
+ } else if (type2 == SPLICE_JUNCTION) {
+ if (invertp == false) {
+ if (Junction_sensedir(post_junction) == SENSE_FORWARD) {
+ FPRINTF(fp,",dir:sense");
+ } else if (Junction_sensedir(post_junction) == SENSE_ANTI) {
+ FPRINTF(fp,",dir:antisense");
+ } else {
+ FPRINTF(fp,",dir:unknown");
+ }
+ } else {
+ if (Junction_sensedir(post_junction) == SENSE_FORWARD) {
+ FPRINTF(fp,",dir:antisense");
+ } else if (Junction_sensedir(post_junction) == SENSE_ANTI) {
+ FPRINTF(fp,",dir:sense");
+ } else {
+ FPRINTF(fp,",dir:unknown");
+ }
+ }
+ if ((splice_distance_2 = Junction_splice_distance(post_junction)) > 0) {
+ FPRINTF(fp,",splice_type:consistent");
+ FPRINTF(fp,",splice_dist_2:%u",splice_distance_2);
+ }
+ }
+
+ return;
+}
+
+
+void
+Substring_print_single (Filestring_T fp, T substring, Shortread_T queryseq,
char *chr, bool invertp) {
print_genomic(fp,substring,/*deletion*/(char *) NULL,/*deletionlength*/0,invertp,queryseq);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
print_coordinates(fp,substring,chr,invertp);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
if (invertp == false) {
switch (substring->start_endtype) {
- case END: fprintf(fp,"start:%d",substring->trim_left); break;
- case TERM: fprintf(fp,"term:%d",substring->trim_left); break;
+ case END: FPRINTF(fp,"start:%d",substring->trim_left); break;
+ case TERM: FPRINTF(fp,"term:%d",substring->trim_left); break;
default: fprintf(stderr,"start_endtype is %d\n",substring->start_endtype); abort(); break;
}
} else {
switch (substring->end_endtype) {
- case END: fprintf(fp,"start:%d",substring->trim_right); break;
- case TERM: fprintf(fp,"term:%d",substring->trim_right); break;
+ case END: FPRINTF(fp,"start:%d",substring->trim_right); break;
+ case TERM: FPRINTF(fp,"term:%d",substring->trim_right); break;
default: fprintf(stderr,"end_endtype is %d\n",substring->end_endtype); abort(); break;
}
}
- fprintf(fp,"..");
+ FPRINTF(fp,"..");
if (invertp == false) {
switch (substring->end_endtype) {
- case END: fprintf(fp,"end:%d",substring->trim_right); break;
- case TERM: fprintf(fp,"term:%d",substring->trim_right); break;
+ case END: FPRINTF(fp,"end:%d",substring->trim_right); break;
+ case TERM: FPRINTF(fp,"term:%d",substring->trim_right); break;
default: fprintf(stderr,"end_endtype is %d\n",substring->end_endtype); abort(); break;
}
} else {
switch (substring->start_endtype) {
- case END: fprintf(fp,"end:%d",substring->trim_left); break;
- case TERM: fprintf(fp,"term:%d",substring->trim_left); break;
+ case END: FPRINTF(fp,"end:%d",substring->trim_left); break;
+ case TERM: FPRINTF(fp,"term:%d",substring->trim_left); break;
default: fprintf(stderr,"start_endtype is %d\n",substring->start_endtype); abort(); break;
}
}
- fprintf(fp,",matches:%d,sub:%d",substring->nmatches,substring->nmismatches_bothdiff);
+ FPRINTF(fp,",matches:%d,sub:%d",substring->nmatches,substring->nmismatches_bothdiff);
if (print_nsnpdiffs_p) {
- fprintf(fp,"+%d=%d",substring->nmismatches_refdiff - substring->nmismatches_bothdiff,substring->nmismatches_refdiff);
+ FPRINTF(fp,"+%d=%d",substring->nmismatches_refdiff - substring->nmismatches_bothdiff,substring->nmismatches_refdiff);
if (print_snplabels_p && substring->nmismatches_refdiff > substring->nmismatches_bothdiff) {
print_snp_labels(fp,substring,queryseq);
}
@@ -3547,7 +4429,7 @@ Substring_print_single (FILE *fp, T substring, Shortread_T queryseq,
void
-Substring_print_insertion_1 (FILE *fp, T substring1, T substring2, int nindels,
+Substring_print_insertion_1 (Filestring_T fp, T substring1, T substring2, int nindels,
Shortread_T queryseq, char *chr, bool invertp) {
T substring;
@@ -3562,21 +4444,21 @@ Substring_print_insertion_1 (FILE *fp, T substring1, T substring2, int nindels,
queryseq);
}
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
print_coordinates(fp,substring,chr,invertp);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
if (invertp == false) {
- fprintf(fp,"start:%d..ins:%d,matches:%d,sub:%d",
+ FPRINTF(fp,"start:%d..ins:%d,matches:%d,sub:%d",
substring->trim_left,nindels,substring->nmatches,substring->nmismatches_bothdiff);
} else {
- fprintf(fp,"start:%d..ins:%d,matches:%d,sub:%d",
+ FPRINTF(fp,"start:%d..ins:%d,matches:%d,sub:%d",
substring->trim_right,nindels,substring->nmatches,substring->nmismatches_bothdiff);
}
if (print_nsnpdiffs_p) {
- fprintf(fp,"+%d=%d",substring->nmismatches_refdiff - substring->nmismatches_bothdiff,substring->nmismatches_refdiff);
+ FPRINTF(fp,"+%d=%d",substring->nmismatches_refdiff - substring->nmismatches_bothdiff,substring->nmismatches_refdiff);
if (print_snplabels_p && substring->nmismatches_refdiff > substring->nmismatches_bothdiff) {
print_snp_labels(fp,substring,queryseq);
}
@@ -3587,7 +4469,7 @@ Substring_print_insertion_1 (FILE *fp, T substring1, T substring2, int nindels,
}
void
-Substring_print_insertion_2 (FILE *fp, T substring1, T substring2, int nindels,
+Substring_print_insertion_2 (Filestring_T fp, T substring1, T substring2, int nindels,
Shortread_T queryseq, char *chr, bool invertp) {
T substring;
@@ -3602,21 +4484,21 @@ Substring_print_insertion_2 (FILE *fp, T substring1, T substring2, int nindels,
queryseq);
}
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
print_coordinates(fp,substring,chr,invertp);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
if (invertp == false) {
- fprintf(fp,"ins:%d..end:%d,matches:%d,sub:%d",
+ FPRINTF(fp,"ins:%d..end:%d,matches:%d,sub:%d",
nindels,substring->trim_right,substring->nmatches,substring->nmismatches_bothdiff);
} else {
- fprintf(fp,"ins:%d..end:%d,matches:%d,sub:%d",
+ FPRINTF(fp,"ins:%d..end:%d,matches:%d,sub:%d",
nindels,substring->trim_left,substring->nmatches,substring->nmismatches_bothdiff);
}
if (print_nsnpdiffs_p) {
- fprintf(fp,"+%d=%d",substring->nmismatches_refdiff - substring->nmismatches_bothdiff,substring->nmismatches_refdiff);
+ FPRINTF(fp,"+%d=%d",substring->nmismatches_refdiff - substring->nmismatches_bothdiff,substring->nmismatches_refdiff);
if (print_snplabels_p && substring->nmismatches_refdiff > substring->nmismatches_bothdiff) {
print_snp_labels(fp,substring,queryseq);
}
@@ -3627,7 +4509,7 @@ Substring_print_insertion_2 (FILE *fp, T substring1, T substring2, int nindels,
void
-Substring_print_deletion_1 (FILE *fp, T substring1, T substring2, int nindels,
+Substring_print_deletion_1 (Filestring_T fp, T substring1, T substring2, int nindels,
char *deletion, Shortread_T queryseq, char *chr,
bool invertp) {
T substring;
@@ -3640,21 +4522,21 @@ Substring_print_deletion_1 (FILE *fp, T substring1, T substring2, int nindels,
print_genomic(fp,substring2,deletion,nindels,/*invertp*/true,queryseq);
}
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
print_coordinates(fp,substring,chr,invertp);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
if (invertp == false) {
- fprintf(fp,"start:%d..del:%d,matches:%d,sub:%d",
+ FPRINTF(fp,"start:%d..del:%d,matches:%d,sub:%d",
substring->trim_left,nindels,substring->nmatches,substring->nmismatches_bothdiff);
} else {
- fprintf(fp,"start:%d..del:%d,matches:%d,sub:%d",
+ FPRINTF(fp,"start:%d..del:%d,matches:%d,sub:%d",
substring->trim_right,nindels,substring->nmatches,substring->nmismatches_bothdiff);
}
if (print_nsnpdiffs_p) {
- fprintf(fp,"+%d=%d",substring->nmismatches_refdiff - substring->nmismatches_bothdiff,substring->nmismatches_refdiff);
+ FPRINTF(fp,"+%d=%d",substring->nmismatches_refdiff - substring->nmismatches_bothdiff,substring->nmismatches_refdiff);
if (print_snplabels_p && substring->nmismatches_refdiff > substring->nmismatches_bothdiff) {
print_snp_labels(fp,substring,queryseq);
}
@@ -3665,7 +4547,7 @@ Substring_print_deletion_1 (FILE *fp, T substring1, T substring2, int nindels,
void
-Substring_print_deletion_2 (FILE *fp, T substring1, T substring2, int nindels,
+Substring_print_deletion_2 (Filestring_T fp, T substring1, T substring2, int nindels,
Shortread_T queryseq, char *chr, bool invertp) {
T substring;
@@ -3680,20 +4562,20 @@ Substring_print_deletion_2 (FILE *fp, T substring1, T substring2, int nindels,
queryseq);
}
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
print_coordinates(fp,substring,chr,invertp);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
if (invertp == false) {
- fprintf(fp,"del:%d..end:%d,matches:%d,sub:%d",
+ FPRINTF(fp,"del:%d..end:%d,matches:%d,sub:%d",
nindels,substring->trim_right,substring->nmatches,substring->nmismatches_bothdiff);
} else {
- fprintf(fp,"del:%d..end:%d,matches:%d,sub:%d",
+ FPRINTF(fp,"del:%d..end:%d,matches:%d,sub:%d",
nindels,substring->trim_left,substring->nmatches,substring->nmismatches_bothdiff);
}
if (print_nsnpdiffs_p) {
- fprintf(fp,"+%d=%d",substring->nmismatches_refdiff - substring->nmismatches_bothdiff,substring->nmismatches_refdiff);
+ FPRINTF(fp,"+%d=%d",substring->nmismatches_refdiff - substring->nmismatches_bothdiff,substring->nmismatches_refdiff);
if (print_snplabels_p && substring->nmismatches_refdiff > substring->nmismatches_bothdiff) {
print_snp_labels(fp,substring,queryseq);
}
@@ -3705,59 +4587,61 @@ Substring_print_deletion_2 (FILE *fp, T substring1, T substring2, int nindels,
/* This logic used in splice part of SAM_print */
static void
-print_splice_distance (FILE *fp, T donor, T acceptor, Chrpos_T distance, bool sensep, char *tag) {
+print_splice_distance (Filestring_T fp, T donor, T acceptor, Chrpos_T distance, int sensedir, char *tag) {
bool normalp = true;
if (donor == NULL || acceptor == NULL) {
/* Don't print anything */
} else if (distance == 0U) {
- fprintf(fp,",splice_type:%s",TRANSLOCATION_TEXT);
+ FPRINTF(fp,",splice_type:%s",TRANSLOCATION_TEXT);
} else {
if (donor->plusp != acceptor->plusp) {
- fprintf(fp,",splice_type:%s",INVERSION_TEXT);
+ FPRINTF(fp,",splice_type:%s",INVERSION_TEXT);
normalp = false;
} else if (donor->plusp == true) {
- if (sensep == true) {
+ if (sensedir != SENSE_ANTI) {
+ /* SENSE_FORWARD or SENSE_NULL */
if (acceptor->genomicstart < donor->genomicstart) {
- fprintf(fp,",splice_type:%s",SCRAMBLE_TEXT);
+ FPRINTF(fp,",splice_type:%s",SCRAMBLE_TEXT);
normalp = false;
}
} else {
if (donor->genomicstart < acceptor->genomicstart) {
- fprintf(fp,",splice_type:%s",SCRAMBLE_TEXT);
+ FPRINTF(fp,",splice_type:%s",SCRAMBLE_TEXT);
normalp = false;
}
}
} else {
- if (sensep == true) {
+ if (sensedir != SENSE_ANTI) {
+ /* SENSE_FORWARD or SENSE_NULL */
if (donor->genomicstart < acceptor->genomicstart) {
- fprintf(fp,",splice_type:%s",SCRAMBLE_TEXT);
+ FPRINTF(fp,",splice_type:%s",SCRAMBLE_TEXT);
normalp = false;
}
} else {
if (acceptor->genomicstart < donor->genomicstart) {
- fprintf(fp,",splice_type:%s",SCRAMBLE_TEXT);
+ FPRINTF(fp,",splice_type:%s",SCRAMBLE_TEXT);
normalp = false;
}
}
}
if (normalp == true) {
- fprintf(fp,",splice_type:%s",CONSISTENT_TEXT);
+ FPRINTF(fp,",splice_type:%s",CONSISTENT_TEXT);
}
- fprintf(fp,",%s:%u",tag,distance);
+ FPRINTF(fp,",%s:%u",tag,distance);
}
return;
}
static void
-print_shortexon_splice_distances (FILE *fp, Chrpos_T distance1, Chrpos_T distance2) {
+print_shortexon_splice_distances (Filestring_T fp, Chrpos_T distance1, Chrpos_T distance2) {
if (distance1 == 0U || distance2 == 0U) {
/* Skip */
} else {
- fprintf(fp,",splice_type:%s",CONSISTENT_TEXT);
- fprintf(fp,",splice_dist_1:%u",distance1);
- fprintf(fp,",splice_dist_2:%u",distance2);
+ FPRINTF(fp,",splice_type:%s",CONSISTENT_TEXT);
+ FPRINTF(fp,",splice_dist_1:%u",distance1);
+ FPRINTF(fp,",splice_dist_2:%u",distance2);
}
return;
@@ -3766,7 +4650,7 @@ print_shortexon_splice_distances (FILE *fp, Chrpos_T distance1, Chrpos_T distanc
void
-Substring_print_donor (FILE *fp, T donor, bool sensep, bool invertp, Shortread_T queryseq,
+Substring_print_donor (Filestring_T fp, T donor, int sensedir, bool invertp, Shortread_T queryseq,
Univ_IIT_T chromosome_iit, T acceptor, Chrpos_T chimera_distance) {
char *chr, *label_tag, *splice_dist_tag;
bool allocp;
@@ -3779,50 +4663,72 @@ Substring_print_donor (FILE *fp, T donor, bool sensep, bool invertp, Shortread_T
print_genomic(fp,donor,/*deletion*/NULL,/*deletionlength*/0,invertp,queryseq);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
chr = Univ_IIT_label(chromosome_iit,donor->chrnum,&allocp);
print_coordinates(fp,donor,chr,invertp);
/* printf("donor chimera_pos is %d\n",donor->chimera_pos); */
- fprintf(fp,"\t");
- if (sensep == true && invertp == false) {
- fprintf(fp,"start:%d..donor:%.2f",donor->trim_left,donor->chimera_prob);
- label_tag = "label_2";
- splice_dist_tag = "splice_dist_2";
- } else if (sensep == true && invertp == true) {
- fprintf(fp,"donor:%.2f..end:%d",donor->chimera_prob,donor->trim_left);
- label_tag = "label_1";
- splice_dist_tag = "splice_dist_1";
- } else if (sensep == false && invertp == false) {
- fprintf(fp,"donor:%.2f..end:%d",donor->chimera_prob,donor->trim_right);
- label_tag = "label_1";
- splice_dist_tag = "splice_dist_1";
- } else if (sensep == false && invertp == true) {
- fprintf(fp,"start:%d..donor:%.2f",donor->trim_right,donor->chimera_prob);
- label_tag = "label_2";
- splice_dist_tag = "splice_dist_2";
- }
-
- fprintf(fp,",matches:%d,sub:%d",donor->nmatches,donor->nmismatches_bothdiff);
+ FPRINTF(fp,"\t");
+ if (sensedir == SENSE_FORWARD) {
+ if (invertp == false) {
+ FPRINTF(fp,"start:%d..donor:%.2f",donor->trim_left,donor->chimera_prob);
+ label_tag = "label_2";
+ splice_dist_tag = "splice_dist_2";
+ } else {
+ FPRINTF(fp,"donor:%.2f..end:%d",donor->chimera_prob,donor->trim_left);
+ label_tag = "label_1";
+ splice_dist_tag = "splice_dist_1";
+ }
+ } else if (sensedir == SENSE_ANTI) {
+ if (invertp == false) {
+ FPRINTF(fp,"donor:%.2f..end:%d",donor->chimera_prob,donor->trim_right);
+ label_tag = "label_1";
+ splice_dist_tag = "splice_dist_1";
+ } else {
+ FPRINTF(fp,"start:%d..donor:%.2f",donor->trim_right,donor->chimera_prob);
+ label_tag = "label_2";
+ splice_dist_tag = "splice_dist_2";
+ }
+ } else {
+ /* SENSE_NULL */
+ if (invertp == false) {
+ FPRINTF(fp,"start:%d..splice:%.2f",donor->trim_left,donor->chimera_prob);
+ label_tag = "label_2";
+ splice_dist_tag = "splice_dist_2";
+ } else {
+ FPRINTF(fp,"splice:%.2f..end:%d",donor->chimera_prob,donor->trim_left);
+ label_tag = "label_1";
+ splice_dist_tag = "splice_dist_1";
+ }
+ }
+
+ FPRINTF(fp,",matches:%d,sub:%d",donor->nmatches,donor->nmismatches_bothdiff);
if (print_nsnpdiffs_p) {
- fprintf(fp,"+%d=%d",donor->nmismatches_refdiff - donor->nmismatches_bothdiff,donor->nmismatches_refdiff);
+ FPRINTF(fp,"+%d=%d",donor->nmismatches_refdiff - donor->nmismatches_bothdiff,donor->nmismatches_refdiff);
if (print_snplabels_p && donor->nmismatches_refdiff > donor->nmismatches_bothdiff) {
print_snp_labels(fp,donor,queryseq);
}
}
- if (sensep == true && invertp == false) {
- fprintf(fp,",dir:sense");
- } else if (sensep == true && invertp == true) {
- fprintf(fp,",dir:antisense");
- } else if (sensep == false && invertp == false) {
- fprintf(fp,",dir:antisense");
- } else if (sensep == false && invertp == true) {
- fprintf(fp,",dir:sense");
+ if (sensedir == SENSE_FORWARD) {
+ if (invertp == false) {
+ FPRINTF(fp,",dir:sense");
+ } else {
+ FPRINTF(fp,",dir:antisense");
+ }
+ } else if (sensedir == SENSE_ANTI) {
+ if (invertp == false) {
+ FPRINTF(fp,",dir:antisense");
+ } else {
+ FPRINTF(fp,",dir:sense");
+ }
+ } else {
+ /* SENSE_NULL */
+ FPRINTF(fp,",dir:unknown");
}
if (acceptor != NULL) {
- print_splice_distance(fp,donor,acceptor,chimera_distance,sensep,splice_dist_tag);
+ print_splice_distance(fp,donor,acceptor,chimera_distance,sensedir,splice_dist_tag);
}
#ifdef CHECK_KNOWNI
@@ -3851,7 +4757,7 @@ Substring_print_donor (FILE *fp, T donor, bool sensep, bool invertp, Shortread_T
}
void
-Substring_print_acceptor (FILE *fp, T acceptor, bool sensep, bool invertp, Shortread_T queryseq,
+Substring_print_acceptor (Filestring_T fp, T acceptor, int sensedir, bool invertp, Shortread_T queryseq,
Univ_IIT_T chromosome_iit, T donor, Chrpos_T chimera_distance) {
char *chr, *label_tag, *splice_dist_tag;
bool allocp;
@@ -3864,50 +4770,72 @@ Substring_print_acceptor (FILE *fp, T acceptor, bool sensep, bool invertp, Short
print_genomic(fp,acceptor,/*deletion*/NULL,/*deletionlength*/0,invertp,queryseq);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
chr = Univ_IIT_label(chromosome_iit,acceptor->chrnum,&allocp);
print_coordinates(fp,acceptor,chr,invertp);
/* printf("acceptor chimera_pos is %d\n",acceptor->chimera_pos); */
- fprintf(fp,"\t");
- if (sensep == true && invertp == false) {
- fprintf(fp,"acceptor:%.2f..end:%d",acceptor->chimera_prob,acceptor->trim_right);
- label_tag = "label_1";
- splice_dist_tag = "splice_dist_1";
- } else if (sensep == true && invertp == true) {
- fprintf(fp,"start:%d..acceptor:%.2f",acceptor->trim_right,acceptor->chimera_prob);
- label_tag = "label_2";
- splice_dist_tag = "splice_dist_2";
- } else if (sensep == false && invertp == false) {
- fprintf(fp,"start:%d..acceptor:%.2f",acceptor->trim_left,acceptor->chimera_prob);
- label_tag = "label_2";
- splice_dist_tag = "splice_dist_2";
- } else if (sensep == false && invertp == true) {
- fprintf(fp,"acceptor:%.2f..end:%d",acceptor->chimera_prob,acceptor->trim_left);
- label_tag = "label_1";
- splice_dist_tag = "splice_dist_1";
- }
-
- fprintf(fp,",matches:%d,sub:%d",acceptor->nmatches,acceptor->nmismatches_bothdiff);
+ FPRINTF(fp,"\t");
+ if (sensedir == SENSE_FORWARD) {
+ if (invertp == false) {
+ FPRINTF(fp,"acceptor:%.2f..end:%d",acceptor->chimera_prob,acceptor->trim_right);
+ label_tag = "label_1";
+ splice_dist_tag = "splice_dist_1";
+ } else {
+ FPRINTF(fp,"start:%d..acceptor:%.2f",acceptor->trim_right,acceptor->chimera_prob);
+ label_tag = "label_2";
+ splice_dist_tag = "splice_dist_2";
+ }
+ } else if (sensedir == SENSE_ANTI) {
+ if (invertp == false) {
+ FPRINTF(fp,"start:%d..acceptor:%.2f",acceptor->trim_left,acceptor->chimera_prob);
+ label_tag = "label_2";
+ splice_dist_tag = "splice_dist_2";
+ } else {
+ FPRINTF(fp,"acceptor:%.2f..end:%d",acceptor->chimera_prob,acceptor->trim_left);
+ label_tag = "label_1";
+ splice_dist_tag = "splice_dist_1";
+ }
+ } else {
+ /* SENSE_NULL */
+ if (invertp == false) {
+ FPRINTF(fp,"splice:%.2f..end:%d",acceptor->chimera_prob,acceptor->trim_right);
+ label_tag = "label_1";
+ splice_dist_tag = "splice_dist_1";
+ } else {
+ FPRINTF(fp,"start:%d..splice:%.2f",acceptor->trim_right,acceptor->chimera_prob);
+ label_tag = "label_2";
+ splice_dist_tag = "splice_dist_2";
+ }
+ }
+
+ FPRINTF(fp,",matches:%d,sub:%d",acceptor->nmatches,acceptor->nmismatches_bothdiff);
if (print_nsnpdiffs_p) {
- fprintf(fp,"+%d=%d",acceptor->nmismatches_refdiff - acceptor->nmismatches_bothdiff,acceptor->nmismatches_refdiff);
+ FPRINTF(fp,"+%d=%d",acceptor->nmismatches_refdiff - acceptor->nmismatches_bothdiff,acceptor->nmismatches_refdiff);
if (print_snplabels_p && acceptor->nmismatches_refdiff > acceptor->nmismatches_bothdiff) {
print_snp_labels(fp,acceptor,queryseq);
}
}
- if (sensep == true && invertp == false) {
- fprintf(fp,",dir:sense");
- } else if (sensep == true && invertp == true) {
- fprintf(fp,",dir:antisense");
- } else if (sensep == false && invertp == false) {
- fprintf(fp,",dir:antisense");
- } else if (sensep == false && invertp == true) {
- fprintf(fp,",dir:sense");
+ if (sensedir == SENSE_FORWARD) {
+ if (invertp == false) {
+ FPRINTF(fp,",dir:sense");
+ } else {
+ FPRINTF(fp,",dir:antisense");
+ }
+ } else if (sensedir == SENSE_ANTI) {
+ if (invertp == false) {
+ FPRINTF(fp,",dir:antisense");
+ } else {
+ FPRINTF(fp,",dir:sense");
+ }
+ } else {
+ /* SENSE_NULL */
+ FPRINTF(fp,",dir:unknown");
}
if (donor != NULL) {
- print_splice_distance(fp,donor,acceptor,chimera_distance,sensep,splice_dist_tag);
+ print_splice_distance(fp,donor,acceptor,chimera_distance,sensedir,splice_dist_tag);
}
#ifdef CHECK_KNOWNI
@@ -3939,7 +4867,7 @@ Substring_print_acceptor (FILE *fp, T acceptor, bool sensep, bool invertp, Short
void
-Substring_print_shortexon (FILE *fp, T shortexon, bool sensep, bool invertp, Shortread_T queryseq,
+Substring_print_shortexon (Filestring_T fp, T shortexon, int sensedir, bool invertp, Shortread_T queryseq,
Univ_IIT_T chromosome_iit, Chrpos_T distance1, Chrpos_T distance2) {
char *chr;
bool allocp;
@@ -3978,32 +4906,32 @@ Substring_print_shortexon (FILE *fp, T shortexon, bool sensep, bool invertp, Sho
print_genomic(fp,shortexon,/*deletion*/NULL,/*deletionlength*/0,invertp,queryseq);
- fprintf(fp,"\t");
+ FPRINTF(fp,"\t");
chr = Univ_IIT_label(chromosome_iit,shortexon->chrnum,&allocp);
print_coordinates(fp,shortexon,chr,invertp);
- fprintf(fp,"\t");
- if (sensep == true && invertp == false) {
- fprintf(fp,"acceptor:%.2f..donor:%.2f",shortexon->chimera_prob,shortexon->chimera_prob_2);
- } else if (sensep == true && invertp == true) {
- fprintf(fp,"donor:%.2f..acceptor:%.2f",shortexon->chimera_prob_2,shortexon->chimera_prob);
- } else if (sensep == false && invertp == false) {
- fprintf(fp,"donor:%.2f..acceptor:%.2f",shortexon->chimera_prob_2,shortexon->chimera_prob);
- } else if (sensep == false && invertp == true) {
- fprintf(fp,"acceptor:%.2f..donor:%.2f",shortexon->chimera_prob,shortexon->chimera_prob_2);
+ FPRINTF(fp,"\t");
+ if (sensedir == SENSE_FORWARD && invertp == false) {
+ FPRINTF(fp,"acceptor:%.2f..donor:%.2f",shortexon->chimera_prob,shortexon->chimera_prob_2);
+ } else if (sensedir == SENSE_FORWARD && invertp == true) {
+ FPRINTF(fp,"donor:%.2f..acceptor:%.2f",shortexon->chimera_prob_2,shortexon->chimera_prob);
+ } else if (sensedir == SENSE_ANTI && invertp == false) {
+ FPRINTF(fp,"donor:%.2f..acceptor:%.2f",shortexon->chimera_prob_2,shortexon->chimera_prob);
+ } else if (sensedir == SENSE_ANTI && invertp == true) {
+ FPRINTF(fp,"acceptor:%.2f..donor:%.2f",shortexon->chimera_prob,shortexon->chimera_prob_2);
}
- fprintf(fp,",matches:%d,sub:%d",shortexon->nmatches,shortexon->nmismatches_bothdiff);
+ FPRINTF(fp,",matches:%d,sub:%d",shortexon->nmatches,shortexon->nmismatches_bothdiff);
if (print_nsnpdiffs_p) {
- fprintf(fp,"+%d=%d",shortexon->nmismatches_refdiff - shortexon->nmismatches_bothdiff,shortexon->nmismatches_refdiff);
+ FPRINTF(fp,"+%d=%d",shortexon->nmismatches_refdiff - shortexon->nmismatches_bothdiff,shortexon->nmismatches_refdiff);
if (print_snplabels_p && shortexon->nmismatches_refdiff > shortexon->nmismatches_bothdiff) {
print_snp_labels(fp,shortexon,queryseq);
}
}
- if (sensep == true && invertp == false) {
- fprintf(fp,",dir:sense");
+ if (sensedir == SENSE_FORWARD && invertp == false) {
+ FPRINTF(fp,",dir:sense");
print_shortexon_splice_distances(fp,distance1,distance2);
if (shortexon->chimera_knownp && splicesites_iit) {
@@ -4015,8 +4943,8 @@ Substring_print_shortexon (FILE *fp, T shortexon, bool sensep, bool invertp, Sho
shortexon->chimera_pos_2,/*tag*/"label_2");
}
- } else if (sensep == true && invertp == true) {
- fprintf(fp,",dir:antisense");
+ } else if (sensedir == SENSE_FORWARD && invertp == true) {
+ FPRINTF(fp,",dir:antisense");
print_shortexon_splice_distances(fp,distance1,distance2);
if (shortexon->chimera_knownp_2 && splicesites_iit) {
@@ -4028,8 +4956,8 @@ Substring_print_shortexon (FILE *fp, T shortexon, bool sensep, bool invertp, Sho
shortexon->chimera_pos,/*tag*/"label_2");
}
- } else if (sensep == false && invertp == false) {
- fprintf(fp,",dir:antisense");
+ } else if (sensedir == SENSE_ANTI && invertp == false) {
+ FPRINTF(fp,",dir:antisense");
print_shortexon_splice_distances(fp,distance1,distance2);
@@ -4044,8 +4972,8 @@ Substring_print_shortexon (FILE *fp, T shortexon, bool sensep, bool invertp, Sho
shortexon->chimera_pos,/*tag*/"label_2");
}
- } else if (sensep == false && invertp == true) {
- fprintf(fp,",dir:sense");
+ } else if (sensedir == SENSE_ANTI && invertp == true) {
+ FPRINTF(fp,",dir:sense");
print_shortexon_splice_distances(fp,distance1,distance2);
if (shortexon->chimera_knownp && splicesites_iit) {
print_splicesite_labels(fp,shortexon,acceptor_typeint,
@@ -4067,16 +4995,17 @@ Substring_print_shortexon (FILE *fp, T shortexon, bool sensep, bool invertp, Sho
/* Needs to be here to access splicesites_iit */
void
-Substring_print_gmap (FILE *fp, struct Pair_T *pairs, int npairs, int nsegments, bool invertedp,
+Substring_print_gmap (Filestring_T fp, struct Pair_T *pairs, int npairs, int nsegments, bool invertedp,
Endtype_T start_endtype, Endtype_T end_endtype,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
int querylength, bool watsonp, int cdna_direction, int score,
- int insertlength, int pairscore, int mapq_score, Univ_IIT_T chromosome_iit) {
+ int insertlength, int pairscore, int mapq_score, Univ_IIT_T chromosome_iit,
+ bool pairedp, GMAP_source_T gmap_source) {
Pair_print_gsnap(fp,pairs,npairs,nsegments,invertedp,
start_endtype,end_endtype,chrnum,chroffset,chrhigh,querylength,
watsonp,cdna_direction,score,insertlength,pairscore,mapq_score,
chromosome_iit,splicesites_iit,splicesites_divint_crosstable,
- donor_typeint,acceptor_typeint);
+ donor_typeint,acceptor_typeint,pairedp,gmap_source);
return;
}
@@ -4155,7 +5084,6 @@ Substring_tally (T this, IIT_T tally_iit, int *tally_divint_crosstable) {
bool allocp;
Chrpos_T chrpos, intervalend;
- char *chr;
Chrpos_T coordstart, coordend, pos5, pos3;
int *matches;
int nmatches, i;
@@ -4253,6 +5181,8 @@ Substring_count_mismatches_region (T this, int trim_left, int trim_right,
if (this == NULL) {
return 0;
+ } else if (this->ambiguous_p == true) {
+ return this->nmismatches_whole; /* Could refine to test each left in ambcoords */
}
left_bound = trim_left;
@@ -4288,7 +5218,7 @@ Substring_count_mismatches_region (T this, int trim_left, int trim_right,
************************************************************************/
List_T
-Substring_convert_to_pairs (List_T pairs, T substring, Shortread_T queryseq,
+Substring_convert_to_pairs (List_T pairs, T substring, int querylength, Shortread_T queryseq,
int hardclip_low, int hardclip_high, int queryseq_offset) {
int querystart, queryend, querypos, i;
Chrpos_T chrpos;
@@ -4299,6 +5229,9 @@ Substring_convert_to_pairs (List_T pairs, T substring, Shortread_T queryseq,
return pairs;
}
+ debug6(printf("*** Entered Substring_convert_to_pairs with querylength %d, hardclip_low %d, hardclip_high %d\n",
+ querylength,hardclip_low,hardclip_high));
+
seq1 = Shortread_fullpointer_uc(queryseq);
if (substring->plusp == true) {
if (hardclip_low > substring->querystart) {
@@ -4307,14 +5240,19 @@ Substring_convert_to_pairs (List_T pairs, T substring, Shortread_T queryseq,
querystart = substring->querystart;
}
- if (substring->querylength - hardclip_high < substring->queryend) {
- queryend = substring->querylength - hardclip_high;
+ if (querylength - hardclip_high < substring->queryend) {
+ queryend = querylength - hardclip_high;
} else {
queryend = substring->queryend;
}
/* Pairs are all zero-based, so do not add 1 */
+#if 0
chrpos = substring->genomicstart_adj + querystart - substring->chroffset /*+ 1U*/;
+#else
+ chrpos = substring->genomicstart + querystart - substring->chroffset /*+ 1U*/;
+#endif
+ debug6(printf("plus conversion\n"));
debug6(printf("querystart %d, queryend %d, plusp %d\n",querystart,queryend,substring->plusp));
debug6(printf("alignstart %u, alignend %u\n",substring->alignstart_trim - substring->chroffset,
substring->alignend_trim - substring->chroffset));
@@ -4322,27 +5260,34 @@ Substring_convert_to_pairs (List_T pairs, T substring, Shortread_T queryseq,
if (substring->genomic_bothdiff == NULL) {
/* Exact match */
- for (i = querystart, querypos = queryseq_offset + querystart; i < queryend; i++) {
- pairs = List_push_out(pairs,(void *) Pair_new_out(querypos++,/*genomepos*/chrpos++,
+ for (i = querystart, querypos = queryseq_offset + querystart; i < queryend; i++, querypos++) {
+ pairs = List_push_out(pairs,(void *) Pair_new_out(querypos,/*genomepos*/chrpos++,
seq1[i],/*comp*/MATCH_COMP,seq1[i]));
}
} else if (show_refdiff_p == true) {
- for (i = querystart, querypos = queryseq_offset + querystart; i < queryend; i++) {
+ for (i = querystart, querypos = queryseq_offset + querystart; i < queryend; i++, querypos++) {
if (isupper(genome = substring->genomic_refdiff[i])) {
- pairs = List_push_out(pairs,(void *) Pair_new_out(querypos++,/*genomepos*/chrpos++,
+ assert(seq1[i] == genome || seq1[i] == 'N');
+ pairs = List_push_out(pairs,(void *) Pair_new_out(querypos,/*genomepos*/chrpos++,
seq1[i],/*comp*/MATCH_COMP,genome));
} else {
- pairs = List_push_out(pairs,(void *) Pair_new_out(querypos++,/*genomepos*/chrpos++,
+ assert(seq1[i] != toupper(genome));
+ pairs = List_push_out(pairs,(void *) Pair_new_out(querypos,/*genomepos*/chrpos++,
seq1[i],/*comp*/MISMATCH_COMP,toupper(genome)));
}
}
} else {
- for (i = querystart, querypos = queryseq_offset + querystart; i < queryend; i++) {
+ /* printf("querystart %d, queryend %d\n",querystart,queryend); */
+ /* printf("seq1 %s\n",seq1); */
+ /* printf("genome %s\n",substring->genomic_bothdiff); */
+ for (i = querystart, querypos = queryseq_offset + querystart; i < queryend; i++, querypos++) {
if (isupper(genome = substring->genomic_bothdiff[i])) {
- pairs = List_push_out(pairs,(void *) Pair_new_out(querypos++,/*genomepos*/chrpos++,
+ assert(seq1[i] == genome || seq1[i] == 'N');
+ pairs = List_push_out(pairs,(void *) Pair_new_out(querypos,/*genomepos*/chrpos++,
seq1[i],/*comp*/MATCH_COMP,genome));
} else {
- pairs = List_push_out(pairs,(void *) Pair_new_out(querypos++,/*genomepos*/chrpos++,
+ assert(seq1[i] != toupper(genome));
+ pairs = List_push_out(pairs,(void *) Pair_new_out(querypos,/*genomepos*/chrpos++,
seq1[i],/*comp*/MISMATCH_COMP,toupper(genome)));
}
}
@@ -4355,14 +5300,19 @@ Substring_convert_to_pairs (List_T pairs, T substring, Shortread_T queryseq,
querystart = substring->querystart;
}
- if (substring->querylength - hardclip_low < substring->queryend) {
- queryend = substring->querylength - hardclip_low;
+ if (querylength - hardclip_low < substring->queryend) {
+ queryend = querylength - hardclip_low;
} else {
queryend = substring->queryend;
}
/* For minus, to get 0-based coordinates, subtract 1 */
+#if 0
chrpos = substring->genomicstart_adj - querystart - substring->chroffset - 1U;
+#else
+ chrpos = substring->genomicstart - querystart - substring->chroffset - 1U;
+#endif
+ debug6(printf("minus conversion\n"));
debug6(printf("querystart %d, queryend %d, plusp %d\n",querystart,queryend,substring->plusp));
debug6(printf("alignstart %u, alignend %u\n",substring->alignstart_trim - substring->chroffset,
substring->alignend_trim - substring->chroffset));
@@ -4370,39 +5320,45 @@ Substring_convert_to_pairs (List_T pairs, T substring, Shortread_T queryseq,
if (substring->genomic_bothdiff == NULL) {
/* Exact match */
- for (i = querystart, querypos = queryseq_offset + querystart; i < queryend; i++) {
- pairs = List_push_out(pairs,(void *) Pair_new_out(querypos++,/*genomepos*/chrpos--,
+ for (i = querystart, querypos = queryseq_offset + querystart; i < queryend; i++, querypos++) {
+ pairs = List_push_out(pairs,(void *) Pair_new_out(querypos,/*genomepos*/chrpos--,
seq1[i],/*comp*/MATCH_COMP,seq1[i]));
}
} else if (show_refdiff_p == true) {
- for (i = querystart, querypos = queryseq_offset + querystart; i < queryend; i++) {
+ for (i = querystart, querypos = queryseq_offset + querystart; i < queryend; i++, querypos++) {
if (isupper(genome = substring->genomic_refdiff[i])) {
- pairs = List_push_out(pairs,(void *) Pair_new_out(querypos++,/*genomepos*/chrpos--,
+ assert(seq1[i] == genome || seq1[i] == 'N');
+ pairs = List_push_out(pairs,(void *) Pair_new_out(querypos,/*genomepos*/chrpos--,
seq1[i],/*comp*/MATCH_COMP,genome));
} else {
- pairs = List_push_out(pairs,(void *) Pair_new_out(querypos++,/*genomepos*/chrpos--,
+ assert(seq1[i] != toupper(genome));
+ pairs = List_push_out(pairs,(void *) Pair_new_out(querypos,/*genomepos*/chrpos--,
seq1[i],/*comp*/MISMATCH_COMP,toupper(genome)));
}
}
} else {
- for (i = querystart, querypos = queryseq_offset + querystart; i < queryend; i++) {
+ for (i = querystart, querypos = queryseq_offset + querystart; i < queryend; i++, querypos++) {
if (isupper(genome = substring->genomic_bothdiff[i])) {
- pairs = List_push_out(pairs,(void *) Pair_new_out(querypos++,/*genomepos*/chrpos--,
+ /* assert(seq1[i] == genome || seq1[i] == 'N'); */
+ pairs = List_push_out(pairs,(void *) Pair_new_out(querypos,/*genomepos*/chrpos--,
seq1[i],/*comp*/MATCH_COMP,genome));
} else {
- pairs = List_push_out(pairs,(void *) Pair_new_out(querypos++,/*genomepos*/chrpos--,
+ /* assert(seq1[i] != toupper(genome)); */
+ pairs = List_push_out(pairs,(void *) Pair_new_out(querypos,/*genomepos*/chrpos--,
seq1[i],/*comp*/MISMATCH_COMP,toupper(genome)));
}
}
}
}
+ debug6(Pair_dump_list(pairs,true));
return pairs;
}
List_T
-Substring_add_insertion (List_T pairs, T substringA, T substringB, int insertionlength, Shortread_T queryseq,
+Substring_add_insertion (List_T pairs, T substringA, T substringB, int querylength,
+ int insertionlength, Shortread_T queryseq,
int hardclip_low, int hardclip_high, int queryseq_offset) {
int querystartA, queryendA, querystartB, queryendB, querypos, i;
Chrpos_T chrendA;
@@ -4416,8 +5372,8 @@ Substring_add_insertion (List_T pairs, T substringA, T substringB, int insertion
querystartA = substringA->querystart;
}
- if (substringA->querylength - hardclip_high < substringA->queryend) {
- queryendA = substringA->querylength - hardclip_high;
+ if (querylength - hardclip_high < substringA->queryend) {
+ queryendA = querylength - hardclip_high;
} else {
queryendA = substringA->queryend;
}
@@ -4428,14 +5384,18 @@ Substring_add_insertion (List_T pairs, T substringA, T substringB, int insertion
querystartB = substringB->querystart;
}
- if (substringB->querylength - hardclip_high < substringB->queryend) {
- queryendB = substringB->querylength - hardclip_high;
+ if (querylength - hardclip_high < substringB->queryend) {
+ queryendB = querylength - hardclip_high;
} else {
queryendB = substringB->queryend;
}
/* Pairs are all zero-based, so do not add 1 */
+#if 0
chrendA = substringA->genomicstart_adj + queryendA - substringA->chroffset /*+ 1U*/;
+#else
+ chrendA = substringA->genomicstart + queryendA - substringA->chroffset /*+ 1U*/;
+#endif
} else {
if (hardclip_high > substringA->querystart) {
@@ -4444,8 +5404,8 @@ Substring_add_insertion (List_T pairs, T substringA, T substringB, int insertion
querystartA = substringA->querystart;
}
- if (substringA->querylength - hardclip_low < substringA->queryend) {
- queryendA = substringA->querylength - hardclip_low;
+ if (querylength - hardclip_low < substringA->queryend) {
+ queryendA = querylength - hardclip_low;
} else {
queryendA = substringA->queryend;
}
@@ -4456,14 +5416,18 @@ Substring_add_insertion (List_T pairs, T substringA, T substringB, int insertion
querystartB = substringB->querystart;
}
- if (substringB->querylength - hardclip_low < substringB->queryend) {
- queryendB = substringB->querylength - hardclip_low;
+ if (querylength - hardclip_low < substringB->queryend) {
+ queryendB = querylength - hardclip_low;
} else {
queryendB = substringB->queryend;
}
/* Pairs are all zero-based, so subtract 1 */
+#if 0
chrendA = substringA->genomicstart_adj - queryendA - substringA->chroffset - 1U;
+#else
+ chrendA = substringA->genomicstart - queryendA - substringA->chroffset - 1U;
+#endif
}
if (querystartA <= queryendA && querystartB <= queryendB) {
@@ -4481,7 +5445,8 @@ Substring_add_insertion (List_T pairs, T substringA, T substringB, int insertion
List_T
-Substring_add_deletion (List_T pairs, T substringA, T substringB, char *deletion, int deletionlength,
+Substring_add_deletion (List_T pairs, T substringA, T substringB, int querylength,
+ char *deletion, int deletionlength,
int hardclip_low, int hardclip_high, int queryseq_offset) {
int querystartA, queryendA, querystartB, queryendB, querypos, k;
Chrpos_T chrendA;
@@ -4493,8 +5458,8 @@ Substring_add_deletion (List_T pairs, T substringA, T substringB, char *deletion
querystartA = substringA->querystart;
}
- if (substringA->querylength - hardclip_high < substringA->queryend) {
- queryendA = substringA->querylength - hardclip_high;
+ if (querylength - hardclip_high < substringA->queryend) {
+ queryendA = querylength - hardclip_high;
} else {
queryendA = substringA->queryend;
}
@@ -4505,14 +5470,18 @@ Substring_add_deletion (List_T pairs, T substringA, T substringB, char *deletion
querystartB = substringB->querystart;
}
- if (substringB->querylength - hardclip_high < substringB->queryend) {
- queryendB = substringB->querylength - hardclip_high;
+ if (querylength - hardclip_high < substringB->queryend) {
+ queryendB = querylength - hardclip_high;
} else {
queryendB = substringB->queryend;
}
/* Pairs are all zero-based, so do not add 1 */
+#if 0
chrendA = substringA->genomicstart_adj + queryendA - substringA->chroffset /*+ 1U*/;
+#else
+ chrendA = substringA->genomicstart + queryendA - substringA->chroffset /*+ 1U*/;
+#endif
if (querystartA < queryendA && querystartB < queryendB) {
querypos = queryendA + queryseq_offset;
@@ -4529,8 +5498,8 @@ Substring_add_deletion (List_T pairs, T substringA, T substringB, char *deletion
querystartA = substringA->querystart;
}
- if (substringA->querylength - hardclip_low < substringA->queryend) {
- queryendA = substringA->querylength - hardclip_low;
+ if (querylength - hardclip_low < substringA->queryend) {
+ queryendA = querylength - hardclip_low;
} else {
queryendA = substringA->queryend;
}
@@ -4541,14 +5510,18 @@ Substring_add_deletion (List_T pairs, T substringA, T substringB, char *deletion
querystartB = substringB->querystart;
}
- if (substringB->querylength - hardclip_low < substringB->queryend) {
- queryendB = substringB->querylength - hardclip_low;
+ if (querylength - hardclip_low < substringB->queryend) {
+ queryendB = querylength - hardclip_low;
} else {
queryendB = substringB->queryend;
}
/* Pairs are all zero-based, so subtract 1 */
+#if 0
chrendA = substringA->genomicstart_adj - queryendA - substringA->chroffset - 1U;
+#else
+ chrendA = substringA->genomicstart - queryendA - substringA->chroffset - 1U;
+#endif
if (querystartA <= queryendA && querystartB <= queryendB) {
querypos = queryendA + queryseq_offset;
@@ -4565,7 +5538,7 @@ Substring_add_deletion (List_T pairs, T substringA, T substringB, char *deletion
List_T
-Substring_add_intron (List_T pairs, T substringA, T substringB,
+Substring_add_intron (List_T pairs, T substringA, T substringB, int querylength,
int hardclip_low, int hardclip_high, int queryseq_offset) {
int querystartA, queryendA, querystartB, queryendB, querypos;
Chrpos_T chrendA;
@@ -4577,8 +5550,8 @@ Substring_add_intron (List_T pairs, T substringA, T substringB,
querystartA = substringA->querystart;
}
- if (substringA->querylength - hardclip_high < substringA->queryend) {
- queryendA = substringA->querylength - hardclip_high;
+ if (querylength - hardclip_high < substringA->queryend) {
+ queryendA = querylength - hardclip_high;
} else {
queryendA = substringA->queryend;
}
@@ -4589,14 +5562,18 @@ Substring_add_intron (List_T pairs, T substringA, T substringB,
querystartB = substringB->querystart;
}
- if (substringB->querylength - hardclip_high < substringB->queryend) {
- queryendB = substringB->querylength - hardclip_high;
+ if (querylength - hardclip_high < substringB->queryend) {
+ queryendB = querylength - hardclip_high;
} else {
queryendB = substringB->queryend;
}
/* Pairs are all zero-based, so do not add 1 */
+#if 0
chrendA = substringA->genomicstart_adj + queryendA - substringA->chroffset /*+ 1U*/;
+#else
+ chrendA = substringA->genomicstart + queryendA - substringA->chroffset /*+ 1U*/;
+#endif
} else {
if (hardclip_high > substringA->querystart) {
@@ -4605,8 +5582,8 @@ Substring_add_intron (List_T pairs, T substringA, T substringB,
querystartA = substringA->querystart;
}
- if (substringA->querylength - hardclip_low < substringA->queryend) {
- queryendA = substringA->querylength - hardclip_low;
+ if (querylength - hardclip_low < substringA->queryend) {
+ queryendA = querylength - hardclip_low;
} else {
queryendA = substringA->queryend;
}
@@ -4617,14 +5594,18 @@ Substring_add_intron (List_T pairs, T substringA, T substringB,
querystartB = substringB->querystart;
}
- if (substringB->querylength - hardclip_low < substringB->queryend) {
- queryendB = substringB->querylength - hardclip_low;
+ if (querylength - hardclip_low < substringB->queryend) {
+ queryendB = querylength - hardclip_low;
} else {
queryendB = substringB->queryend;
}
/* Pairs are all zero-based, so subtract 1 */
+#if 0
chrendA = substringA->genomicstart_adj - queryendA - substringA->chroffset - 1U;
+#else
+ chrendA = substringA->genomicstart - queryendA - substringA->chroffset - 1U;
+#endif
}
if (querystartA <= queryendA && querystartB <= queryendB) {
diff --git a/src/substring.h b/src/substring.h
index 51e69f9..e4a12a7 100644
--- a/src/substring.h
+++ b/src/substring.h
@@ -1,4 +1,4 @@
-/* $Id: substring.h 161653 2015-03-22 16:10:53Z twu $ */
+/* $Id: substring.h 166827 2015-06-03 06:55:46Z twu $ */
#ifndef SUBSTRING_INCLUDED
#define SUBSTRING_INCLUDED
@@ -14,8 +14,19 @@
#include "iit-read.h"
#include "bool.h"
#include "pairdef.h"
+#include "filestring.h"
+#include "junction.h"
+#include "intlist.h"
+#include "doublelist.h"
+#ifdef LARGE_GENOMES
+#include "uint8list.h"
+#else
+#include "uintlist.h"
+#endif
-typedef enum {END, INS, DEL, DON, ACC, AMB_DON, AMB_ACC, TERM} Endtype_T;
+
+typedef enum {GMAP_NOT_APPLICABLE, GMAP_VIA_SUBSTRINGS, GMAP_VIA_SEGMENTS, GMAP_VIA_REGION} GMAP_source_T;
+typedef enum {END, INS, DEL, FRAG, DON, ACC, AMB_DON, AMB_ACC, TERM} Endtype_T;
extern char *
Endtype_string (Endtype_T endtype);
@@ -40,22 +51,39 @@ Substring_unalias_circular (T this);
extern T
Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
- Univcoord_T chrhigh, Chrpos_T chrlength, Univcoord_T left,
- Univcoord_T genomicstart, Univcoord_T genomicend,
- Univcoord_T genomicstart_adj, Univcoord_T genomicend_adj,
+ Univcoord_T chrhigh, Chrpos_T chrlength,
Compress_T query_compress, Endtype_T start_endtype, Endtype_T end_endtype,
int querystart, int queryend, int querylength,
Univcoord_T alignstart, Univcoord_T alignend, int genomiclength,
- int extraleft, int extraright, bool exactp,
- bool plusp, int genestrand, bool first_read_p,
- bool trim_left_p, bool trim_right_p, int minlength);
+ bool exactp, bool plusp, int genestrand, bool first_read_p,
+ bool trim_left_p, bool trim_right_p, int outofbounds_start, int outofbounds_end,
+ int minlength);
+
+extern T
+Substring_new_ambig (int querystart, int queryend, int splice_pos, int querylength,
+ Chrnum_T chrnum, Univcoord_T chroffset,
+ Univcoord_T chrhigh, Chrpos_T chrlength,
+ int genomiclength, bool plusp, int genestrand, bool first_read_p,
+#ifdef LARGE_GENOMES
+ Uint8list_T ambcoords,
+#else
+ Uintlist_T ambcoords,
+#endif
+ Intlist_T amb_knowni, Intlist_T amb_nmismatches, Doublelist_T amb_probs,
+ double amb_common_prob, bool amb_donor_common_p, bool substring1p);
+
+extern Univcoord_T
+Substring_set_unambiguous (double *donor_prob, double *acceptor_prob, Univcoord_T *genomicstart, Univcoord_T *genomicend,
+ T this, int bingoi);
extern float
Substring_compute_mapq (T this, Compress_T query_compress, char *quality_string, bool trim_terminals_p);
extern int
-Substring_display_prep (char **deletion, T this, char *query, Compress_T query_compress_fwd, Compress_T query_compress_rev,
- Genome_T genome, int deletion_pos, int deletion_length);
+Substring_display_prep (T this, char *queryuc_ptr, int querylength,
+ int extraleft, int extraright,
+ Compress_T query_compress_fwd, Compress_T query_compress_rev,
+ Genome_T genome);
extern bool
Substring_bad_stretch_p (T this, Compress_T query_compress_fwd, Compress_T query_compress_rev);
@@ -79,6 +107,8 @@ extern Univcoord_T
Substring_overlap_segment_trimmed (T substring1, T substring2);
extern Univcoord_T
+Substring_left (T this);
+extern Univcoord_T
Substring_splicecoord (T this);
extern int
Substring_splicesites_knowni (T this);
@@ -140,6 +170,8 @@ Substring_querylength (T this);
extern int
Substring_match_length (T this);
extern int
+Substring_match_length_amb (T this);
+extern int
Substring_match_length_orig (T this);
extern Chrpos_T
Substring_genomic_alignment_length (T this);
@@ -156,25 +188,42 @@ extern Univcoord_T
Substring_alignstart (T this);
extern Univcoord_T
Substring_alignend (T this);
+extern Chrpos_T
+Substring_alignstart_chr (T this);
+extern Chrpos_T
+Substring_alignend_chr (T this);
extern Univcoord_T
Substring_alignstart_trim (T this);
extern Univcoord_T
Substring_alignend_trim (T this);
+extern Chrpos_T
+Substring_alignstart_trim_chr (T this);
+extern Chrpos_T
+Substring_alignend_trim_chr (T this);
extern Univcoord_T
Substring_left_genomicseg (T this);
extern Univcoord_T
Substring_genomicstart (T this);
+extern Chrpos_T
+Substring_genomicstart_chr (T this);
extern Univcoord_T
Substring_genomicstart_adj (T this);
extern Univcoord_T
Substring_genomicend (T this);
extern Chrpos_T
+Substring_genomicend_chr (T this);
+extern Chrpos_T
Substring_genomiclength (T this);
-extern Chrpos_T
-Substring_alignstart_chr (T this);
-extern Chrpos_T
-Substring_alignend_chr (T this);
+extern double
+Substring_amb_donor_prob (T this);
+extern double
+Substring_amb_acceptor_prob (T this);
+
+extern double
+Substring_siteA_prob (T this);
+extern double
+Substring_siteD_prob (T this);
extern double
Substring_chimera_prob (T this);
@@ -186,16 +235,22 @@ extern int
Substring_chimera_pos_A (T this);
extern int
Substring_chimera_pos_D (T this);
-extern bool
-Substring_chimera_knownp (T this);
-extern int
-Substring_nchimera_known (T this);
-extern int
-Substring_nchimera_novel (T this);
extern int
Substring_chimera_sensedir (T this);
+
extern bool
-Substring_chimera_sensep (T this);
+Substring_ambiguous_p (T this);
+extern int
+Substring_nambcoords (T this);
+extern Univcoord_T *
+Substring_ambcoords (T this);
+extern int *
+Substring_amb_knowni (T this);
+extern int *
+Substring_amb_nmismatches (T this);
+extern double *
+Substring_amb_probs (T this);
+
extern int
Substring_circularpos (T this);
@@ -204,21 +259,32 @@ extern T
Substring_copy (T old);
extern T
+Substring_new_startfrag (Univcoord_T startfrag_coord, int splice_pos, int nmismatches,
+ Univcoord_T left, Compress_T query_compress,
+ int querylength, bool plusp, int genestrand, bool first_read_p,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength);
+extern T
+Substring_new_endfrag (Univcoord_T endfrag_coord, int splice_pos, int nmismatches,
+ Univcoord_T left, Compress_T query_compress,
+ int querylength, bool plusp, int genestrand, bool first_read_p,
+ Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength);
+
+extern T
Substring_new_donor (Univcoord_T donor_coord, int donor_knowni, int donor_pos, int donor_nmismatches,
double donor_prob, Univcoord_T left, Compress_T query_compress,
- int querylength, bool plusp, int genestrand, bool first_read_p, bool sensep,
+ int querylength, bool plusp, int genestrand, bool first_read_p, int sensedir,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength);
extern T
Substring_new_acceptor (Univcoord_T acceptor_coord, int acceptor_knowni, int acceptor_pos, int acceptor_nmismatches,
double acceptor_prob, Univcoord_T left, Compress_T query_compress,
- int querylength, bool plusp, int genestrand, bool first_read_p, bool sensep,
+ int querylength, bool plusp, int genestrand, bool first_read_p, int sensedir,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength);
extern T
Substring_new_shortexon (Univcoord_T acceptor_coord, int acceptor_knowni, Univcoord_T donor_coord, int donor_knowni,
int acceptor_pos, int donor_pos, int nmismatches,
double acceptor_prob, double donor_prob, Univcoord_T left,
Compress_T query_compress, int querylength,
- bool plusp, int genestrand, bool first_read_p, bool sensep,
+ bool plusp, int genestrand, bool first_read_p, int sensedir,
bool acceptor_ambp, bool donor_ambp,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Chrpos_T chrlength);
@@ -226,41 +292,23 @@ extern List_T
Substring_sort_chimera_halves (List_T hitlist, bool ascendingp);
+extern Chrpos_T
+Substring_compute_chrpos (T this, int hardclip_low, bool hide_soft_clips_p);
+
extern void
-Substring_print_m8 (FILE *fp, T substring, Shortread_T headerseq, char *acc_suffix,
+Substring_print_m8 (Filestring_T fp, T substring, Shortread_T headerseq, char *acc_suffix,
char *chr, bool invertp);
extern void
-Substring_print_single (FILE *fp, T substring, Shortread_T queryseq,
- char *chr, bool invertp);
-extern void
-Substring_print_insertion_1 (FILE *fp, T substring1, T substring2, int nindels,
- Shortread_T queryseq, char *chr, bool invertp);
-extern void
-Substring_print_insertion_2 (FILE *fp, T substring1, T substring2, int nindels,
- Shortread_T queryseq, char *chr, bool invertp);
-extern void
-Substring_print_deletion_1 (FILE *fp, T substring1, T substring2, int nindels,
- char *deletion, Shortread_T queryseq, char *chr,
- bool invertp);
-extern void
-Substring_print_deletion_2 (FILE *fp, T substring1, T substring2, int nindels,
- Shortread_T queryseq, char *chr, bool invertp);
-extern void
-Substring_print_donor (FILE *fp, T donor, bool sensep, bool invertp, Shortread_T queryseq,
- Univ_IIT_T chromosome_iit, T acceptor, Chrpos_T chimera_distance);
-extern void
-Substring_print_acceptor (FILE *fp, T acceptor, bool sensep, bool invertp, Shortread_T queryseq,
- Univ_IIT_T chromosome_iit, T donor, Chrpos_T chimera_distance);
-extern void
-Substring_print_shortexon (FILE *fp, T shortexon, bool sensep, bool invertp, Shortread_T queryseq,
- Univ_IIT_T chromosome_iit, Chrpos_T distance1, Chrpos_T distance2);
+Substring_print_alignment (Filestring_T fp, Junction_T pre_junction, T substring, Junction_T post_junction,
+ Shortread_T queryseq, Genome_T genome, char *chr, bool invertp);
extern void
-Substring_print_gmap (FILE *fp, struct Pair_T *pairs, int npairs, int nsegments, bool invertedp,
+Substring_print_gmap (Filestring_T fp, struct Pair_T *pairs, int npairs, int nsegments, bool invertedp,
Endtype_T start_endtype, Endtype_T end_endtype,
Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
int querylength, bool watsonp, int cdna_direction, int score,
- int insertlength, int pairscore, int mapq_score, Univ_IIT_T chromosome_iit);
+ int insertlength, int pairscore, int mapq_score, Univ_IIT_T chromosome_iit,
+ bool pairedp, GMAP_source_T gmap_source);
extern bool
Substring_contains_known_splicesite (T this);
@@ -275,37 +323,23 @@ extern bool
Substring_runlength_p (T this, IIT_T runlength_iit, int *runlength_divint_crosstable);
-#ifdef USE_OLD_MAXENT
-extern void
-Substring_assign_donor_prob (T donor, Genome_T genome, Univ_IIT_T chromosome_iit);
-extern void
-Substring_assign_acceptor_prob (T acceptor, Genome_T genome, Univ_IIT_T chromosome_iit);
-extern void
-Substring_assign_shortexon_prob (T shortexon, Genome_T genome, Univ_IIT_T chromosome_iit);
-#else
-extern void
-Substring_assign_donor_prob (T donor);
-extern void
-Substring_assign_acceptor_prob (T acceptor);
-extern void
-Substring_assign_shortexon_prob (T shortexon);
-#endif
-
extern int
Substring_count_mismatches_region (T this, int trim_left, int trim_right,
Compress_T query_compress_fwd, Compress_T query_compress_rev);
extern List_T
-Substring_convert_to_pairs (List_T pairs, T substring, Shortread_T queryseq,
+Substring_convert_to_pairs (List_T pairs, T substring, int querylength, Shortread_T queryseq,
int hardclip_low, int hardclip_high, int queryseq_offset);
extern List_T
-Substring_add_insertion (List_T pairs, T substringA, T substringB, int insertionlength, Shortread_T queryseq,
+Substring_add_insertion (List_T pairs, T substringA, T substringB, int querylength,
+ int insertionlength, Shortread_T queryseq,
int hardclip_low, int hardclip_high, int queryseq_offset);
extern List_T
-Substring_add_deletion (List_T pairs, T substringA, T substringB, char *deletion, int deletionlength,
+Substring_add_deletion (List_T pairs, T substringA, T substringB, int querylength,
+ char *deletion, int deletionlength,
int hardclip_low, int hardclip_high, int queryseq_offset);
extern List_T
-Substring_add_intron (List_T pairs, T substringA, T substringB,
+Substring_add_intron (List_T pairs, T substringA, T substringB, int querylength,
int hardclip_low, int hardclip_high, int queryseq_offset);
#undef T
diff --git a/src/tableuint8.h b/src/tableuint8.h
index 63ce2f8..a32e4f0 100644
--- a/src/tableuint8.h
+++ b/src/tableuint8.h
@@ -1,6 +1,7 @@
-/* $Id: tableuint8.h 99737 2013-06-27 19:33:03Z twu $ */
+/* $Id: tableuint8.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef TABLEUINT8_INCLUDED
#define TABLEUINT8_INCLUDED
+
#include "types.h"
#define T Tableuint8_T
diff --git a/src/translation.c b/src/translation.c
index 769773e..b7586a8 100644
--- a/src/translation.c
+++ b/src/translation.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: translation.c 130296 2014-03-17 17:16:05Z twu $";
+static char rcsid[] = "$Id: translation.c 155282 2014-12-12 19:42:54Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -2094,7 +2094,7 @@ fill_aa_rev (int *strlen_g, int *strlen_c, int *netchars, char *aa_genomicseg, c
static void
-print_mutation (FILE *fp, bool *printp, int aapos, int strlen_g, int strlen_c, int refquerypos,
+print_mutation (Filestring_T fp, bool *printp, int aapos, int strlen_g, int strlen_c, int refquerypos,
char *aa_genomicseg, char *aa_queryseq, char *nt_genomicseg, char *nt_queryseq) {
bool print_refquerypos_p = true;
#if 0
@@ -2102,24 +2102,24 @@ print_mutation (FILE *fp, bool *printp, int aapos, int strlen_g, int strlen_c, i
#endif
if (strlen_g > strlen_c) {
- if (*printp == true) fprintf(fp,", "); else *printp = true;
+ if (*printp == true) FPRINTF(fp,", "); else *printp = true;
if (aa_genomicseg[0] == aa_queryseq[0]) {
- fprintf(fp,"del%s%d%s ",&(aa_genomicseg[1]),aapos+1,&(aa_queryseq[1]));
+ FPRINTF(fp,"del%s%d%s ",&(aa_genomicseg[1]),aapos+1,&(aa_queryseq[1]));
refquerypos += 3;
} else {
- fprintf(fp,"del%s%d%s ",aa_genomicseg,aapos,aa_queryseq);
+ FPRINTF(fp,"del%s%d%s ",aa_genomicseg,aapos,aa_queryseq);
}
} else if (strlen_g < strlen_c) {
- if (*printp == true) fprintf(fp,", "); else *printp = true;
+ if (*printp == true) FPRINTF(fp,", "); else *printp = true;
if (strlen_c - strlen_g > 4) {
- fprintf(fp,"ins%d+%daa ",aapos,strlen_c-strlen_g);
+ FPRINTF(fp,"ins%d+%daa ",aapos,strlen_c-strlen_g);
#if 0
print_nt_p = false;
#endif
} else if (aa_genomicseg[0] == aa_queryseq[0]) {
- fprintf(fp,"ins%s%d%s ",&(aa_genomicseg[1]),aapos,&(aa_queryseq[1]));
+ FPRINTF(fp,"ins%s%d%s ",&(aa_genomicseg[1]),aapos,&(aa_queryseq[1]));
} else {
- fprintf(fp,"ins%s%d%s ",aa_genomicseg,aapos,aa_queryseq);
+ FPRINTF(fp,"ins%s%d%s ",aa_genomicseg,aapos,aa_queryseq);
}
} else if (aa_genomicseg[0] == 'X' || aa_queryseq[0] == 'X') {
#if 0
@@ -2127,19 +2127,19 @@ print_mutation (FILE *fp, bool *printp, int aapos, int strlen_g, int strlen_c, i
#endif
print_refquerypos_p = false;
} else {
- if (*printp == true) fprintf(fp,", "); else *printp = true;
- fprintf(fp,"%s%d%s ",aa_genomicseg,aapos,aa_queryseq);
+ if (*printp == true) FPRINTF(fp,", "); else *printp = true;
+ FPRINTF(fp,"%s%d%s ",aa_genomicseg,aapos,aa_queryseq);
}
#if 0
if (print_nt_p == true) {
- fprintf(fp,"(%s>%s) ",nt_genomicseg,nt_queryseq);
+ FPRINTF(fp,"(%s>%s) ",nt_genomicseg,nt_queryseq);
}
#endif
if (print_refquerypos_p == true) {
#ifdef PMAP
- fprintf(fp,"[%d]",refquerypos+2);
+ FPRINTF(fp,"[%d]",refquerypos+2);
#else
- fprintf(fp,"[%d]",refquerypos);
+ FPRINTF(fp,"[%d]",refquerypos);
#endif
}
@@ -2147,25 +2147,25 @@ print_mutation (FILE *fp, bool *printp, int aapos, int strlen_g, int strlen_c, i
}
static void
-print_large_deletion (FILE *fp, bool *printp, int lastaapos, int nextaapos, int refquerypos) {
+print_large_deletion (Filestring_T fp, bool *printp, int lastaapos, int nextaapos, int refquerypos) {
- if (*printp == true) fprintf(fp,", "); else *printp = true;
- fprintf(fp,"del%d-%daa ",lastaapos+1,nextaapos-lastaapos-1);
- fprintf(fp,"[%d]",refquerypos+3);
+ if (*printp == true) FPRINTF(fp,", "); else *printp = true;
+ FPRINTF(fp,"del%d-%daa ",lastaapos+1,nextaapos-lastaapos-1);
+ FPRINTF(fp,"[%d]",refquerypos+3);
return;
}
void
-Translation_print_comparison (FILE *fp, struct Pair_T *pairs, int npairs, struct Pair_T *refpairs, int nrefpairs,
+Translation_print_comparison (Filestring_T fp, struct Pair_T *pairs, int npairs, struct Pair_T *refpairs, int nrefpairs,
int cdna_direction, int relaastart, int relaaend, int maxmutations) {
int i, j;
int aapos, strlen_g, strlen_c, netchars;
bool printp = false;
char nt_genomicseg[MAXMUT], aa_genomicseg[MAXMUT], nt_queryseq[MAXMUT], aa_queryseq[MAXMUT];
- fprintf(fp," Amino acid changes: ");
+ FPRINTF(fp," Amino acid changes: ");
if (relaastart < relaaend) {
if ((aapos = pairs[i=0].aapos) == 0) {
@@ -2214,7 +2214,7 @@ Translation_print_comparison (FILE *fp, struct Pair_T *pairs, int npairs, struct
}
}
- fprintf(fp,"\n");
+ FPRINTF(fp,"\n");
return;
}
diff --git a/src/translation.h b/src/translation.h
index 7cd4e9a..215190f 100644
--- a/src/translation.h
+++ b/src/translation.h
@@ -1,9 +1,12 @@
-/* $Id: translation.h 40271 2011-05-28 02:29:18Z twu $ */
+/* $Id: translation.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef TRANSLATION_INCLUDED
#define TRANSLATION_INCLUDED
+
#include <stdio.h>
#include "bool.h"
#include "pair.h"
+#include "filestring.h"
+
#define T Translation_T
typedef struct T *T;
@@ -30,7 +33,7 @@ Translation_via_reference (int *relaastart, int *relaaend,
struct Pair_T *refpairs, int nrefpairs, bool refwatsonp, bool fixshiftp);
extern void
-Translation_print_comparison (FILE *fp, struct Pair_T *pairs, int npairs, struct Pair_T *refpairs, int nrefpairs,
+Translation_print_comparison (Filestring_T fp, struct Pair_T *pairs, int npairs, struct Pair_T *refpairs, int nrefpairs,
int cdna_direction, int relaastart, int relaaend, int maxmutations);
#undef T
diff --git a/src/types.h b/src/types.h
index 22b5df3..cb02ba0 100644
--- a/src/types.h
+++ b/src/types.h
@@ -1,8 +1,8 @@
-/* $Id: types.h 148721 2014-09-24 00:45:45Z twu $ */
+/* $Id: types.h 157223 2015-01-22 18:43:01Z twu $ */
#ifndef TYPES_INCLUDED
#define TYPES_INCLUDED
#ifdef HAVE_CONFIG_H
-#include <config.h>
+#include <config.h> /* For SIZEOF_UNSIGNED_LONG_LONG, SIZEOF_UNSIGNED_LONG, HAVE_64_BIT */
#endif
/* Number of bits, such as index1part or basesize. Need to allow for negative values. */
@@ -34,15 +34,18 @@ typedef UINT4 Genomecomp_T;
#define MAXIMUM_KMER 16
typedef unsigned long long UINT8;
typedef unsigned long long Oligospace_T;
+
#elif (SIZEOF_UNSIGNED_LONG == 8)
#define HAVE_64_BIT 1
#define MAXIMUM_KMER 16
typedef unsigned long UINT8;
typedef unsigned long Oligospace_T;
+
#else
#define MAXIMUM_KMER 15
#define OLIGOSPACE_NOT_LONG
typedef unsigned int Oligospace_T;
+
#endif
/* Contents of compressed offsets file. Storing as UINT4, even for
@@ -84,7 +87,7 @@ typedef UINT4 Positionsptr_T;
#endif
-/* For definition of Univcoord_T and Chrpos_T, see genomicpos.h */
+/* For definition of Chrpos_T, see genomicpos.h */
/* For intervals and IIT files */
#ifdef HAVE_64_BIT
@@ -93,20 +96,24 @@ typedef UINT4 Positionsptr_T;
#include "uint8list.h"
typedef UINT8 Univcoord_T;
typedef Uint8list_T Univcoordlist_T;
+
#elif defined(LARGE_GENOMES)
#include "uint8list.h"
typedef UINT8 Univcoord_T;
typedef Uint8list_T Univcoordlist_T;
+
#else
#include "uintlist.h"
typedef UINT4 Univcoord_T;
typedef Uintlist_T Univcoordlist_T;
+
#endif
#else
#include "uintlist.h"
typedef UINT4 Univcoord_T;
typedef Uintlist_T Univcoordlist_T;
+
#endif
/* For splicetrie */
diff --git a/src/uint8list.c b/src/uint8list.c
index bc3af15..6559466 100644
--- a/src/uint8list.c
+++ b/src/uint8list.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uint8list.c 145990 2014-08-25 21:47:32Z twu $";
+static char rcsid[] = "$Id: uint8list.c 166641 2015-05-29 21:13:04Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -90,6 +90,41 @@ Uint8list_length (T list) {
return n;
}
+UINT8
+Uint8list_max (T list) {
+ UINT8 m = 0;
+
+ while (list) {
+ if (list->first > m) {
+ m = list->first;
+ }
+ list = list->rest;
+ }
+
+ return m;
+}
+
+UINT8
+Uint8list_min (T list) {
+ UINT8 m;
+
+ if (list == NULL) {
+ return 0;
+
+ } else {
+ m = list->first;
+ list = list->rest;
+ while (list) {
+ if (list->first < m) {
+ m = list->first;
+ }
+ list = list->rest;
+ }
+
+ return m;
+ }
+}
+
UINT8 *
Uint8list_to_array (int *n, T list) {
UINT8 *array;
diff --git a/src/uint8list.h b/src/uint8list.h
index 587a0f8..b149bd3 100644
--- a/src/uint8list.h
+++ b/src/uint8list.h
@@ -1,4 +1,4 @@
-/* $Id: uint8list.h 148721 2014-09-24 00:45:45Z twu $ */
+/* $Id: uint8list.h 166641 2015-05-29 21:13:04Z twu $ */
#ifndef UINT8LIST_INCLUDED
#define UINT8LIST_INCLUDED
@@ -25,6 +25,10 @@ extern T
Uint8list_reverse (T list);
extern int
Uint8list_length (T list);
+extern UINT8
+Uint8list_max (T list);
+extern UINT8
+Uint8list_min (T list);
extern UINT8 *
Uint8list_to_array (int *n, T list);
extern UINT8 *
diff --git a/src/uintlist.c b/src/uintlist.c
index 1fe4137..4fb0ccc 100644
--- a/src/uintlist.c
+++ b/src/uintlist.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uintlist.c 145990 2014-08-25 21:47:32Z twu $";
+static char rcsid[] = "$Id: uintlist.c 166641 2015-05-29 21:13:04Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -92,6 +92,41 @@ Uintlist_length (T list) {
return n;
}
+UINT4
+Uintlist_max (T list) {
+ UINT4 m = 0;
+
+ while (list) {
+ if (list->first > m) {
+ m = list->first;
+ }
+ list = list->rest;
+ }
+
+ return m;
+}
+
+UINT4
+Uintlist_min (T list) {
+ UINT4 m;
+
+ if (list == NULL) {
+ return 0;
+
+ } else {
+ m = list->first;
+ list = list->rest;
+ while (list) {
+ if (list->first < m) {
+ m = list->first;
+ }
+ list = list->rest;
+ }
+
+ return m;
+ }
+}
+
UINT4 *
Uintlist_to_array (int *n, T list) {
UINT4 *array;
diff --git a/src/uintlist.h b/src/uintlist.h
index 7948fcd..773fd56 100644
--- a/src/uintlist.h
+++ b/src/uintlist.h
@@ -1,4 +1,4 @@
-/* $Id: uintlist.h 148721 2014-09-24 00:45:45Z twu $ */
+/* $Id: uintlist.h 166641 2015-05-29 21:13:04Z twu $ */
#ifndef UINTLIST_INCLUDED
#define UINTLIST_INCLUDED
@@ -25,6 +25,10 @@ extern T
Uintlist_reverse (T list);
extern int
Uintlist_length (T list);
+extern UINT4
+Uintlist_max (T list);
+extern UINT4
+Uintlist_min (T list);
extern UINT4 *
Uintlist_to_array (int *n, T list);
extern void
diff --git a/src/uniqscan.c b/src/uniqscan.c
index 4a4e39b..79b0470 100644
--- a/src/uniqscan.c
+++ b/src/uniqscan.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uniqscan.c 158355 2015-02-10 19:08:45Z twu $";
+static char rcsid[] = "$Id: uniqscan.c 166641 2015-05-29 21:13:04Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -94,6 +94,21 @@ static int gmap_allowance = 3;
static int min_intronlength = 9;
static int max_deletionlength = 50;
+static int sufflookback = 60;
+static int nsufflookback = 5;
+static int maxintronlen = 200000; /* Was used previously in stage 1. Now used only in stage 2 and Stage3_mergeable. */
+
+static int extraband_single = 6; /* This is in addition to length2 -
+ length1. If onesidegap is true in
+ dynprog.c, then this is equivalent
+ to extraband_single of 0. Needs
+ to be > 0 to handle default
+ close_indels_mode. */
+static int extraband_end = 6; /* Was 6. Shouldn't differ from 0, since onesidegapp is true?
+ This is only on both sides of main diagonal */
+static int extraband_paired = 14; /* This is in addition to length2 - length1 */
+static int ngap = 3;
+
/************************************************************************
* Global parameters
@@ -103,6 +118,7 @@ static Univ_IIT_T chromosome_iit = NULL;
static int circular_typeint = -1;
static int nchromosomes = 0;
static bool *circularp = NULL;
+static bool any_circular_p;
static Indexdb_T indexdb = NULL;
static Indexdb_T indexdb2 = NULL; /* For cmet or atoi */
static Genome_T genome = NULL;
@@ -410,10 +426,6 @@ uniqueness_scan (bool from_right_p) {
Diagpool_T diagpool;
Cellpool_T cellpool;
-#ifdef MEMUSAGE
- long int memusage_constant = 0;
-#endif
-
oligoindices_major = Oligoindex_array_new_major(MAX_QUERYLENGTH_FOR_ALLOC,MAX_GENOMICLENGTH_FOR_ALLOC);
oligoindices_minor = Oligoindex_array_new_minor(MAX_QUERYLENGTH_FOR_ALLOC,MAX_GENOMICLENGTH_FOR_ALLOC);
dynprogL = Dynprog_new(nullgap,EXTRAQUERYGAP,maxpeelback,extramaterial_end,extramaterial_paired,
@@ -429,11 +441,6 @@ uniqueness_scan (bool from_right_p) {
floors_array = (Floors_T *) CALLOC(MAX_READLENGTH+1,sizeof(Floors_T));
/* Except_stack_create(); -- requires pthreads */
-#ifdef MEMUSAGE
- memusage_constant += Mem_usage_report();
- Mem_usage_reset(0);
-#endif
-
for (i = 0; i < 10; i++) {
sprintf(&(digit[i]),"%d",i);
}
@@ -451,7 +458,7 @@ uniqueness_scan (bool from_right_p) {
/*barcode_length*/0,/*invertp*/0,/*copy_acc_p*/false,/*skipp*/false);
stage3array = Stage1_single_read(&npaths,&first_absmq,&second_absmq,
queryseq1,indexdb,indexdb2,indexdb_size_threshold,
- genome,floors_array,user_maxlevel_float,
+ floors_array,user_maxlevel_float,
indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,/*distantsplicing_penalty*/100,min_shortend,
@@ -495,7 +502,7 @@ uniqueness_scan (bool from_right_p) {
/*barcode_length*/0,/*invertp*/0,/*copy_acc_p*/false,/*skipp*/false);
stage3array = Stage1_single_read(&npaths,&first_absmq,&second_absmq,
queryseq1,indexdb,indexdb2,indexdb_size_threshold,
- genome,floors_array,user_maxlevel_float,
+ floors_array,user_maxlevel_float,
indel_penalty_middle,indel_penalty_end,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
localsplicing_penalty,/*distantsplicing_penalty*/100,min_shortend,
@@ -744,12 +751,6 @@ main (int argc, char *argv[]) {
const char *long_name;
char *string;
-#ifdef MEMUSAGE
- Mem_usage_init();
- Mem_usage_set_threadname("main");
-#endif
-
-
while ((opt = getopt_long(argc,argv,
"D:d:k:q:GN:M:m:i:y:Y:z:Z:w:e:l:g:S:s:V:v:53",
long_options, &long_option_index)) != -1) {
@@ -1004,14 +1005,14 @@ main (int argc, char *argv[]) {
} else {
nchromosomes = Univ_IIT_total_nintervals(chromosome_iit);
circular_typeint = Univ_IIT_typeint(chromosome_iit,"circular");
- circularp = Univ_IIT_circularp(chromosome_iit);
+ circularp = Univ_IIT_circularp(&any_circular_p,chromosome_iit);
}
FREE(iitfile);
if (snps_root == NULL) {
genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- uncompressedp,genome_access);
+ uncompressedp,genome_access,/*sharedp*/false);
if (mode == CMET_STRANDED || mode == CMET_NONSTRANDED) {
if (user_cmetdir == NULL) {
modedir = genomesubdir;
@@ -1022,7 +1023,8 @@ main (int argc, char *argv[]) {
if ((indexdb = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"metct",/*snps_root*/NULL,
required_index1part,required_interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ /*sharedp*/false)) == NULL) {
fprintf(stderr,"Cannot find metct index file. Need to run cmetindex first\n");
exit(9);
}
@@ -1030,7 +1032,8 @@ main (int argc, char *argv[]) {
if ((indexdb2 = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"metga",/*snps_root*/NULL,
required_index1part,required_interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ /*sharedp*/false)) == NULL) {
fprintf(stderr,"Cannot find metga index file. Need to run cmetindex first\n");
exit(9);
}
@@ -1045,7 +1048,8 @@ main (int argc, char *argv[]) {
if ((indexdb = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"a2iag",/*snps_root*/NULL,
required_index1part,required_interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ /*sharedp*/false)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -1053,7 +1057,8 @@ main (int argc, char *argv[]) {
if ((indexdb2 = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"a2itc",/*snps_root*/NULL,
required_index1part,required_interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ /*sharedp*/false)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -1064,7 +1069,8 @@ main (int argc, char *argv[]) {
if ((indexdb = Indexdb_new_genome(&index1part,&index1interval,
genomesubdir,fileroot,IDX_FILESUFFIX,/*snps_root*/NULL,
required_index1part,required_interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ /*sharedp*/false)) == NULL) {
fprintf(stderr,"Cannot find offsets file %s.%s*offsets, needed for GSNAP\n",fileroot,IDX_FILESUFFIX);
exit(9);
}
@@ -1082,9 +1088,9 @@ main (int argc, char *argv[]) {
/* SNPs */
genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
- uncompressedp,genome_access);
+ uncompressedp,genome_access,/*sharedp*/false);
genomealt = Genome_new(snpsdir,fileroot,snps_root,/*genometype*/GENOME_OLIGOS,
- uncompressedp,genome_access);
+ uncompressedp,genome_access,/*sharedp*/false);
if (mode == CMET_STRANDED || mode == CMET_NONSTRANDED) {
if (user_cmetdir == NULL) {
@@ -1096,14 +1102,16 @@ main (int argc, char *argv[]) {
if ((indexdb = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"metct",snps_root,
required_index1part,required_interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ /*sharedp*/false)) == NULL) {
fprintf(stderr,"Cannot find metct index file. Need to run cmetindex first\n");
exit(9);
}
if ((indexdb2 = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"metga",snps_root,
required_index1part,required_interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ /*sharedp*/false)) == NULL) {
fprintf(stderr,"Cannot find metga index file. Need to run cmetindex first\n");
exit(9);
}
@@ -1118,14 +1126,16 @@ main (int argc, char *argv[]) {
if ((indexdb = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"a2iag",snps_root,
required_index1part,required_interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ /*sharedp*/false)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
if ((indexdb2 = Indexdb_new_genome(&index1part,&index1interval,
modedir,fileroot,/*idx_filesuffix*/"a2itc",snps_root,
required_index1part,required_interval,
- expand_offsets_p,offsetsstrm_access,positions_access)) == NULL) {
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ /*sharedp*/false)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -1134,7 +1144,8 @@ main (int argc, char *argv[]) {
indexdb = Indexdb_new_genome(&index1part,&index1interval,
snpsdir,fileroot,/*idx_filesuffix*/"ref",snps_root,
required_index1part,required_interval,
- expand_offsets_p,offsetsstrm_access,positions_access);
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ /*sharedp*/false);
if (indexdb == NULL) {
fprintf(stderr,"Cannot find snps index file for %s in directory %s\n",snps_root,snpsdir);
exit(9);
@@ -1197,9 +1208,9 @@ main (int argc, char *argv[]) {
Indel_setup(min_indel_end_matches,indel_penalty_middle);
Stage1hr_setup(/*use_sarray_p*/false,/*use_only_sarray_p*/false,index1part,index1interval,
spansize,chromosome_iit,nchromosomes,
- genomealt,mode,/*maxpaths_search*/10,/*terminal_threshold*/5,/*terminal_output_minlength*/0,
+ genome,genomealt,mode,/*maxpaths_search*/10,/*terminal_threshold*/5,/*reject_trimlength*/1000,
splicesites,splicetypes,splicedists,nsplicesites,
- novelsplicingp,knownsplicingp,distances_observed_p,
+ novelsplicingp,knownsplicingp,/*find_dna_chimeras_p*/false,distances_observed_p,
subopt_levels,max_middle_insertions,max_middle_deletions,
shortsplicedist,shortsplicedist_known,shortsplicedist_novelend,min_intronlength,
min_distantsplicing_end_matches,min_distantsplicing_identity,
@@ -1222,27 +1233,29 @@ main (int argc, char *argv[]) {
trieoffsets_obs,triecontents_obs,trieoffsets_max,triecontents_max);
Oligoindex_hr_setup(Genome_blocks(genome),/*mode*/STANDARD);
Stage2_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,/*cross_species_p*/false,
- suboptimal_score_start,suboptimal_score_end,
+ suboptimal_score_start,suboptimal_score_end,sufflookback,nsufflookback,maxintronlen,
mode,/*snps_p*/snps_iit ? true : false);
Pair_setup(trim_mismatch_score,trim_indel_score,/*gff3_separators_p*/false,/*sam_insert_0M_p*/false,
/*force_xs_direction_p*/false,/*md_lowercase_variant_p*/false,
- /*snps_p*/snps_iit ? true : false,
- Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false),
- /*cigar_action*/CIGAR_ACTION_IGNORE);
+ /*snps_p*/snps_iit ? true : false,/*print_nsnpdiffs_p*/snps_iit ? true : false,
+ Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false));
Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
/*require_splicedir_p*/false,splicing_iit,splicing_divint_crosstable,
donor_typeint,acceptor_typeint,
splicesites,min_intronlength,max_deletionlength,/*min_indel_end_matches*/6,
- /*output_sam_p*/false,/*homopolymerp*/false,/*stage3debug*/NO_STAGE3DEBUG);
- Stage3hr_setup(/*invert_first_p*/false,/*invert_second_p*/false,genes_iit,genes_divint_crosstable,
+ maxpeelback_distalmedial,nullgap,extramaterial_end,extramaterial_paired,
+ extraband_single,extraband_end,extraband_paired,
+ ngap,maxintronlen,/*output_sam_p*/false,/*homopolymerp*/false,
+ /*stage3debug*/NO_STAGE3DEBUG);
+ Stage3hr_setup(/*invert_first_p*/false,/*invert_second_p*/false,genome,
+ chromosome_iit,nchromosomes,circular_typeint,genes_iit,genes_divint_crosstable,
/*tally_iit*/NULL,/*tally_divint_crosstable*/NULL,
- /*runlength_iit*/NULL,/*runlength_divint_crosstable*/NULL,/*terminal_output_minlength*/0,
+ /*runlength_iit*/NULL,/*runlength_divint_crosstable*/NULL,/*reject_trimlength*/1000,
distances_observed_p,pairmax,expected_pairlength,pairlength_deviation,
localsplicing_penalty,indel_penalty_middle,antistranded_penalty,
favor_multiexon_p,gmap_min_nconsecutive,index1part,index1interval,
- novelsplicingp,/*merge_samechr_p*/false,circularp,
- /*fails_as_input_p*/false,/*fastq_format_p*/false,/*print_m8_p*/false,
- /*want_random_p*/true);
+ novelsplicingp,/*merge_samechr_p*/false,circularp,/*failedinput_root*/NULL,
+ /*print_m8_p*/false,/*want_random_p*/true);
uniqueness_scan(from_right_p);
diff --git a/src/univdiag.c b/src/univdiag.c
new file mode 100644
index 0000000..869bafd
--- /dev/null
+++ b/src/univdiag.c
@@ -0,0 +1,116 @@
+static char rcsid[] = "$I$";
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include "univdiag.h"
+#include "univdiagdef.h"
+#include "mem.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+
+#define T Univdiag_T
+
+
+T
+Univdiag_new (int querystart, int queryend, Univcoord_T univdiagonal) {
+ T new = (T) MALLOC(sizeof(*new));
+
+ new->univdiagonal = univdiagonal;
+ new->querystart = querystart;
+ new->queryend = queryend;
+ new->nconsecutive = queryend - querystart + 1;
+ new->nmismatches_known_p = true;
+
+ new->intscore = 0;
+ new->nlinked = 0;
+ new->prev = (Univdiag_T) NULL;
+
+ return new;
+}
+
+
+T
+Univdiag_new_fillin (int querystart, int queryend, int indexsize, Univcoord_T univdiagonal) {
+ T new = (T) MALLOC(sizeof(*new));
+
+ new->univdiagonal = univdiagonal;
+ new->querystart = querystart;
+ new->queryend = queryend + indexsize - 1;
+ new->nconsecutive = new->queryend - querystart + 1;
+ new->nmismatches_known_p = false;
+
+ new->intscore = 0;
+ new->nlinked = 0;
+ new->prev = (Univdiag_T) NULL;
+
+ return new;
+}
+
+
+void
+Univdiag_free (T *old) {
+ FREE(*old);
+ return;
+}
+
+void
+Univdiag_gc (List_T *list) {
+ T univdiagonal;
+ List_T p;
+
+ for (p = *list; p != NULL; p = List_next(p)) {
+ univdiagonal = (T) List_head(p);
+ FREE(univdiagonal);
+ }
+ List_free(&(*list));
+ return;
+}
+
+
+int
+Univdiag_ascending_cmp (const void *a, const void *b) {
+ T x = * (T *) a;
+ T y = * (T *) b;
+
+ if (x->querystart < y->querystart) {
+ return -1;
+ } else if (y->querystart < x->querystart) {
+ return +1;
+ } else if (x->queryend < y->queryend) {
+ return -1;
+ } else if (y->queryend < x->queryend) {
+ return +1;
+ } else if (x->univdiagonal < y->univdiagonal) {
+ return -1;
+ } else if (y->univdiagonal < x->univdiagonal) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+
+int
+Univdiag_descending_cmp (const void *a, const void *b) {
+ T x = * (T *) a;
+ T y = * (T *) b;
+
+ if (x->querystart > y->querystart) {
+ return -1;
+ } else if (y->querystart > x->querystart) {
+ return +1;
+ } else if (x->queryend > y->queryend) {
+ return -1;
+ } else if (y->queryend > x->queryend) {
+ return +1;
+ } else if (x->univdiagonal > y->univdiagonal) {
+ return -1;
+ } else if (y->univdiagonal > x->univdiagonal) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+
diff --git a/src/univdiag.h b/src/univdiag.h
new file mode 100644
index 0000000..20022c5
--- /dev/null
+++ b/src/univdiag.h
@@ -0,0 +1,31 @@
+/* $Id: univdiag.h 166641 2015-05-29 21:13:04Z twu $ */
+#ifndef UNIVDIAG_INCLUDED
+#define UNIVDIAG_INCLUDED
+
+#include "bool.h"
+#include "list.h"
+#include "genomicpos.h"
+#include "types.h"
+
+#define T Univdiag_T
+typedef struct T *T;
+
+
+extern T
+Univdiag_new (int querystart, int queryend, Univcoord_T univdiagonal);
+extern T
+Univdiag_new_fillin (int querystart, int queryend, int indexsize, Univcoord_T univdiagonal);
+extern void
+Univdiag_free (T *old);
+extern void
+Univdiag_gc (List_T *list);
+
+extern int
+Univdiag_ascending_cmp (const void *a, const void *b);
+extern int
+Univdiag_descending_cmp (const void *a, const void *b);
+
+#undef T
+#endif
+
+
diff --git a/src/univdiagdef.h b/src/univdiagdef.h
new file mode 100644
index 0000000..5f71768
--- /dev/null
+++ b/src/univdiagdef.h
@@ -0,0 +1,22 @@
+/* $Id: univdiagdef.h 166641 2015-05-29 21:13:04Z twu $ */
+#ifndef UNIVDIAGDEF_INCLUDED
+#define UNIVDIAGDEF_INCLUDED
+
+#include "bool.h"
+
+#define T Univdiag_T
+struct T {
+ Univcoord_T univdiagonal; /* Used by sarray-read.c */
+ int querystart;
+ int queryend;
+ int nconsecutive;
+ bool nmismatches_known_p;
+
+ int intscore; /* Used for dynamic programming of diagonals in sarray-read.c */
+ int nlinked; /* Used for dynamic programming of diagonals in sarray-read.c */
+ struct T *prev; /* Used for dynamic programming of diagonals in sarray-read.c */
+};
+
+#undef T
+#endif
+
diff --git a/src/univinterval.h b/src/univinterval.h
index a533855..a0c6877 100644
--- a/src/univinterval.h
+++ b/src/univinterval.h
@@ -1,6 +1,7 @@
-/* $Id: univinterval.h 102893 2013-07-25 22:11:12Z twu $ */
+/* $Id: univinterval.h 157221 2015-01-22 18:38:57Z twu $ */
#ifndef UNIVINTERVAL_INCLUDED
#define UNIVINTERVAL_INCLUDED
+
#include "bool.h"
#include "genomicpos.h"
#include "types.h"
diff --git a/tests/Makefile.in b/tests/Makefile.in
index 6277d56..23a693e 100644
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -98,9 +98,6 @@ EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GMAPDB = @GMAPDB@
-GOBY_CFLAGS = @GOBY_CFLAGS@
-GOBY_LDFLAGS = @GOBY_LDFLAGS@
-GOBY_LIBS = @GOBY_LIBS@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
@@ -120,6 +117,7 @@ MAX_READLENGTH = @MAX_READLENGTH@
MKDIR_P = @MKDIR_P@
MPICC = @MPICC@
MPILIBS = @MPILIBS@
+MPI_CFLAGS = @MPI_CFLAGS@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
diff --git a/util/Makefile.in b/util/Makefile.in
index bf4f035..2a662a2 100644
--- a/util/Makefile.in
+++ b/util/Makefile.in
@@ -133,9 +133,6 @@ EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GMAPDB = @GMAPDB@
-GOBY_CFLAGS = @GOBY_CFLAGS@
-GOBY_LDFLAGS = @GOBY_LDFLAGS@
-GOBY_LIBS = @GOBY_LIBS@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
@@ -155,6 +152,7 @@ MAX_READLENGTH = @MAX_READLENGTH@
MKDIR_P = @MKDIR_P@
MPICC = @MPICC@
MPILIBS = @MPILIBS@
+MPI_CFLAGS = @MPI_CFLAGS@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
diff --git a/util/gmap_build.pl.in b/util/gmap_build.pl.in
index a03d8bd..e348413 100644
--- a/util/gmap_build.pl.in
+++ b/util/gmap_build.pl.in
@@ -1,5 +1,5 @@
#! @PERL@
-# $Id: gmap_build.pl.in 153958 2014-11-24 17:56:42Z twu $
+# $Id: gmap_build.pl.in 167264 2015-06-10 23:59:39Z twu $
use warnings;
@@ -19,7 +19,7 @@ my $sampling = 3;
my $sleeptime = 2;
GetOptions(
- 'no-sarray' => \$skip_sarray_p, # skip suffix array
+ 'build-sarray=s' => \$build_sarray_p, # build suffix array
'B=s' => \$bindir, # binary directory
'T=s' => \$builddir, # temporary build directory
@@ -118,10 +118,14 @@ if (defined($nmessages)) {
$nmessages_flag = "";
}
-if (defined($skip_sarray_p)) {
+if (!defined($build_sarray_p)) {
+ $sarrayp = 1; # default is to build the suffix array
+} elsif ($build_sarray_p eq "0") {
$sarrayp = 0;
-} else {
+} elsif ($build_sarray_p eq "1") {
$sarrayp = 1;
+} else {
+ die "Argument to --build-sarray needs to be 0 or 1";
}
if (defined($contigs_mapped_p)) {
@@ -338,7 +342,7 @@ sub create_index_offsets {
my ($index_cmd, $compression_flag, $genomecompfile) = @_;
my ($cmd, $rc);
- $cmd = "cat $genomecompfile | $index_cmd -O $compression_flag";
+ $cmd = "$index_cmd -O $compression_flag $genomecompfile";
print STDERR "Running $cmd\n";
if (($rc = system($cmd)) != 0) {
die "$cmd failed with return code $rc";
@@ -351,7 +355,7 @@ sub create_index_positions {
my ($index_cmd, $genomecompfile) = @_;
my ($cmd, $rc);
- $cmd = "cat $genomecompfile | $index_cmd -P";
+ $cmd = "$index_cmd -P $genomecompfile";
print STDERR "Running $cmd\n";
if (($rc = system($cmd)) != 0) {
die "$cmd failed with return code $rc";
@@ -380,6 +384,14 @@ sub make_enhanced_suffix_array {
}
sleep($sleeptime);
+ # Compressed suffix array
+ # $cmd = "$bindir/gmapindex -d $dbname -F $dbdir -D $dbdir -C";
+ # print STDERR "Running $cmd\n";
+ # if (($rc = system($cmd)) != 0) {
+ # die "$cmd failed with return code $rc";
+ # }
+ # sleep($sleeptime);
+
return;
}
@@ -435,7 +447,7 @@ Options:
-e, --nmessages=INT Maximum number of messages (warnings, contig reports) to report (default 50)
- --no-sarray Skip build of suffix array
+ --build-sarray=INT Whether to build suffix array: 0=no, 1=yes (default)
Obsolete options:
-T STRING Temporary build directory (may need to specify if you run out of space in your current directory)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list